Skip to content

Commit 417886f

Browse files
authored
Merge pull request #469 from openforcefield/unique-atom-names
Ensure atom names are unique after construction
2 parents baefccc + d965768 commit 417886f

File tree

6 files changed

+270
-44
lines changed

6 files changed

+270
-44
lines changed

docs/releasehistory.rst

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,38 @@ Releases follow the ``major.minor.micro`` scheme recommended by `PEP440 <https:/
77
* ``minor`` increments add features but do not break API compatibility
88
* ``micro`` increments represent bugfix releases or improvements in documentation
99

10+
0.6.1 - Bugfixes
11+
----------------
12+
13+
New features
14+
""""""""""""
15+
- `PR #469 <https://github.com/openforcefield/openforcefield/pull/469>`_:
16+
The :py:class:`Molecule <openforcefield.topology.Molecule>` adds
17+
:py:meth:`Molecule.has_unique_atom_names <openforcefield.topology.Molecule.has_unique_atom_names>`
18+
and :py:meth:`Molecule.has_unique_atom_names <openforcefield.topology.Molecule.generate_unique_atom_names>`.
19+
20+
Behavior changed
21+
""""""""""""""""
22+
- `PR #469 <https://github.com/openforcefield/openforcefield/pull/469>`_:
23+
When running :py:meth:`Topology.to_openmm <openforcefield.topology.Topology.to_openmm>`, unique atom names
24+
are generated (overriding any existing atom names) if the provided atom names are not unique. This
25+
uniqueness extends only to atoms in the same molecule. To disable this behavior, set the kwarg
26+
``ensure_unique_atom_names=False``.
27+
28+
Tests added
29+
"""""""""""
30+
- `PR #469 <https://github.com/openforcefield/openforcefield/pull/469>`_: Added round-trip SMILES test
31+
to add coverage for :py:meth:`Molecule.from_smiles <openforcefield.topology.Molecule.from_smiles>`.
32+
- `PR #469 <https://github.com/openforcefield/openforcefield/pull/469>`_: Added tests for unique atom
33+
naming behavior in :py:meth:`Topology.to_openmm <openforcefield.topology.Topology.to_openmm>`, as
34+
well as tests of the ``ensure_unique_atom_names=False`` kwarg disabling this behavior.
35+
36+
Bugfixes
37+
""""""""
38+
- `Issue #460 <https://github.com/openforcefield/openforcefield/issues/460>`_: Creates unique atom
39+
names in :py:meth:`Topology.to_openmm <openforcefield.topology.Topology.to_openmm>` if the existing
40+
ones are not unique. The lack of unique atom names caused problems in workflows involving
41+
downstream tools that expect unique atom names.
1042

1143
0.6.0 - Library Charges
1244
-----------------------
@@ -90,10 +122,10 @@ Behavior changed
90122

91123
Tests added
92124
"""""""""""
93-
- `PR #430 <https://github.com/openforcefield/openforcefield/pull/430>`_: Added test for
94-
Wiberg Bond Order implemented in OpenEye Toolkits. Molecules taken from
125+
- `PR #430 <https://github.com/openforcefield/openforcefield/pull/430>`_: Added test for
126+
Wiberg Bond Order implemented in OpenEye Toolkits. Molecules taken from
95127
DOI:10.5281/zenodo.3405489 . Added by Sukanya Sasmal.
96-
128+
97129

98130
Bugfixes
99131
""""""""
@@ -474,7 +506,7 @@ API-breaking Changes
474506
* `PR #291 <https://github.com/openforcefield/openforcefield/pull/291>`_: Remove ``ForceField.load/to_smirnoff_data``, add ``ForceField.to_file/string`` and ``ParameterHandler.add_parameters``. Change behavior of ``ForceField.register_X_handler`` functions.
475507

476508
Bugfixes
477-
""""""""
509+
""""""""
478510
* `PR #327 <https://github.com/openforcefield/openforcefield/pull/327>`_: Fix units in tip3p.offxml (note that this file is still not loadable by current toolkit)
479511
* `PR #325 <https://github.com/openforcefield/openforcefield/pull/325>`_: Fix solvent box for provided test system to resolve periodic clashes.
480512
* `PR #325 <https://github.com/openforcefield/openforcefield/pull/325>`_: Add informative message containing Hill formula when a molecule can't be matched in ``Topology.from_openmm``.

openforcefield/tests/test_forcefield.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ def test_parameterize_ethanol_missing_torsion(self):
746746
topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules)
747747
with pytest.raises(UnassignedProperTorsionParameterException,
748748
match='- Topology indices [(]5, 0, 1, 6[)]: '
749-
'names and elements [(] H[)], [(] C[)], [(] C[)], [(] H[)],') \
749+
'names and elements [(](H\d+)? H[)], [(](C\d+)? C[)], [(](C\d+)? C[)], [(](H\d+)? H[)],') \
750750
as excinfo:
751751
omm_system = forcefield.create_openmm_system(topology)
752752

openforcefield/tests/test_molecule.py

Lines changed: 93 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,26 @@ def mini_drug_bank(xfail_mols=None, wip_mols=None):
192192
# used inside pytest.mark.parametrize (see issue #349 in pytest).
193193
mini_drug_bank.molecules = None
194194

195+
# All the molecules that raise UndefinedStereochemistryError when read by OETK()
196+
openeye_drugbank_undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
197+
'DrugBank_2519', 'DrugBank_2987', 'DrugBank_3502',
198+
'DrugBank_3930', 'DrugBank_4161', 'DrugBank_4162',
199+
'DrugBank_5043', 'DrugBank_5418', 'DrugBank_6531'}
200+
201+
# All the molecules that raise UndefinedStereochemistryError when read by OETK().
202+
# Note that this list is different from that for OEMol,
203+
# since the toolkits have different definitions of "stereogenic"
204+
rdkit_drugbank_undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1962', 'DrugBank_2519',
205+
'DrugBank_3930', 'DrugBank_5043', 'DrugBank_5418'}
206+
207+
208+
# Missing stereo in OE but not RDK: 'DrugBank_2987', 'DrugBank_3502', 'DrugBank_4161',
209+
# 'DrugBank_4162', 'DrugBank_6531', 'DrugBank_1700',
210+
211+
# Some molecules are _valid_ in both OETK and RDKit, but will fail if you try
212+
# to convert from one to the other, since OE adds stereo that RDKit doesn't
213+
drugbank_stereogenic_in_oe_but_not_rdkit = {'DrugBank_1598', 'DrugBank_4346', 'DrugBank_1849',
214+
'DrugBank_2141'}
195215

196216
#=============================================================================================
197217
# TESTS
@@ -260,6 +280,55 @@ def test_create_copy(self, molecule):
260280
molecule_copy = Molecule(molecule)
261281
assert molecule_copy == molecule
262282

283+
@pytest.mark.parametrize('toolkit', [OpenEyeToolkitWrapper, RDKitToolkitWrapper])
284+
@pytest.mark.parametrize('molecule', mini_drug_bank())
285+
def test_to_from_smiles(self, molecule, toolkit):
286+
"""Test round-trip creation from SMILES"""
287+
if not toolkit.is_available():
288+
pytest.skip('Required toolkit is unavailable')
289+
290+
if toolkit == RDKitToolkitWrapper:
291+
# Skip the test if OpenEye assigns stereochemistry but RDKit doesn't (since then, the
292+
# OFF molecule will be loaded, but fail to convert in to_rdkit)
293+
if molecule.name in drugbank_stereogenic_in_oe_but_not_rdkit:
294+
pytest.skip('Molecle is stereogenic in OpenEye (which loaded this dataset), but not RDKit, so it '
295+
'is impossible to make a valid RDMol in this test')
296+
undefined_stereo_mols = rdkit_drugbank_undefined_stereo_mols
297+
elif toolkit == OpenEyeToolkitWrapper:
298+
undefined_stereo_mols = openeye_drugbank_undefined_stereo_mols
299+
300+
toolkit_wrapper = toolkit()
301+
302+
undefined_stereo = molecule.name in undefined_stereo_mols
303+
304+
smiles1 = molecule.to_smiles(toolkit_registry=toolkit_wrapper)
305+
if undefined_stereo:
306+
molecule2 = Molecule.from_smiles(smiles1,
307+
allow_undefined_stereo=True,
308+
toolkit_registry=toolkit_wrapper)
309+
else:
310+
molecule2 = Molecule.from_smiles(smiles1,
311+
toolkit_registry=toolkit_wrapper)
312+
smiles2 = molecule2.to_smiles(toolkit_registry=toolkit_wrapper)
313+
assert (smiles1 == smiles2)
314+
315+
@pytest.mark.parametrize('molecule', mini_drug_bank())
316+
def test_unique_atom_names(self, molecule):
317+
"""Test molecules have unique atom names"""
318+
# The dataset we load in has atom names, so let's strip them first
319+
# to ensure that we can fail the uniqueness check
320+
for atom in molecule.atoms:
321+
atom.name = ''
322+
assert not(molecule.has_unique_atom_names)
323+
# Then genreate unique atom names using the built in algorithm
324+
molecule.generate_unique_atom_names()
325+
# Check that the molecule has unique atom names
326+
assert molecule.has_unique_atom_names
327+
# Check molecule.has_unique_atom_names is working correctly
328+
assert ((len(set([atom.name for atom in molecule.atoms])) == molecule.n_atoms) == molecule.has_unique_atom_names)
329+
molecule.atoms[1].name = molecule.atoms[0].name # no longer unique
330+
assert ((len(set([atom.name for atom in molecule.atoms])) == molecule.n_atoms) == molecule.has_unique_atom_names)
331+
263332
# TODO: Should there be an equivalent toolkit test and leave this as an integration test?
264333
@pytest.mark.slow
265334
def test_create_from_file(self):
@@ -310,13 +379,7 @@ def test_to_from_rdkit(self, molecule):
310379
# import pickle
311380
from openforcefield.utils.toolkits import UndefinedStereochemistryError
312381

313-
# DrugBank test set known failures. Note that this list is different from that for OEMol,
314-
# since the toolkits have different definitions of "stereogenic"
315-
# Stereogenic in OE but not RDK: 'DrugBank_2987', 'DrugBank_3502', 'DrugBank_4161',
316-
# 'DrugBank_4162', 'DrugBank_6531', 'DrugBank_1700',
317-
undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1962', 'DrugBank_2519',
318-
'DrugBank_3930', 'DrugBank_5043', 'DrugBank_5418'}
319-
undefined_stereo = molecule.name in undefined_stereo_mols
382+
undefined_stereo = molecule.name in rdkit_drugbank_undefined_stereo_mols
320383

321384
toolkit_wrapper = RDKitToolkitWrapper()
322385

@@ -385,22 +448,24 @@ def test_to_from_iupac(self, molecule):
385448
from openforcefield.utils.toolkits import UndefinedStereochemistryError
386449

387450
# All the molecules that raise UndefinedStereochemistryError in Molecule.from_iupac()
388-
undefined_stereo_mols = {'DrugBank_977', 'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
389-
'DrugBank_2148', 'DrugBank_2178', 'DrugBank_2186', 'DrugBank_2208',
390-
'DrugBank_2519', 'DrugBank_2538', 'DrugBank_2592', 'DrugBank_2651',
391-
'DrugBank_2987', 'DrugBank_3332', 'DrugBank_3502', 'DrugBank_3622',
392-
'DrugBank_3726', 'DrugBank_3844', 'DrugBank_3930', 'DrugBank_4161',
393-
'DrugBank_4162', 'DrugBank_4778', 'DrugBank_4593', 'DrugBank_4959',
394-
'DrugBank_5043', 'DrugBank_5076', 'DrugBank_5176', 'DrugBank_5418',
395-
'DrugBank_5737', 'DrugBank_5902', 'DrugBank_6304', 'DrugBank_6305',
396-
'DrugBank_6329', 'DrugBank_6355', 'DrugBank_6401', 'DrugBank_6509',
397-
'DrugBank_6531', 'DrugBank_6647',
398-
399-
# These test cases are allowed to fail.
400-
'DrugBank_390', 'DrugBank_810', 'DrugBank_4316', 'DrugBank_4346',
401-
'DrugBank_7124'
402-
}
403-
undefined_stereo = molecule.name in undefined_stereo_mols
451+
# (This is a larger list than the normal group of undefined stereo mols, probably has
452+
# something to do with IUPAC information content)
453+
iupac_problem_mols = {'DrugBank_977', 'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
454+
'DrugBank_2148', 'DrugBank_2178', 'DrugBank_2186', 'DrugBank_2208',
455+
'DrugBank_2519', 'DrugBank_2538', 'DrugBank_2592', 'DrugBank_2651',
456+
'DrugBank_2987', 'DrugBank_3332', 'DrugBank_3502', 'DrugBank_3622',
457+
'DrugBank_3726', 'DrugBank_3844', 'DrugBank_3930', 'DrugBank_4161',
458+
'DrugBank_4162', 'DrugBank_4778', 'DrugBank_4593', 'DrugBank_4959',
459+
'DrugBank_5043', 'DrugBank_5076', 'DrugBank_5176', 'DrugBank_5418',
460+
'DrugBank_5737', 'DrugBank_5902', 'DrugBank_6304', 'DrugBank_6305',
461+
'DrugBank_6329', 'DrugBank_6355', 'DrugBank_6401', 'DrugBank_6509',
462+
'DrugBank_6531', 'DrugBank_6647',
463+
464+
# These test cases are allowed to fail.
465+
'DrugBank_390', 'DrugBank_810', 'DrugBank_4316', 'DrugBank_4346',
466+
'DrugBank_7124'
467+
}
468+
undefined_stereo = molecule.name in iupac_problem_mols
404469

405470
iupac = molecule.to_iupac()
406471

@@ -423,7 +488,7 @@ def test_to_from_topology(self, molecule):
423488
@pytest.mark.parametrize('format', [
424489
'mol2',
425490
'sdf',
426-
pytest.param('pdb', marks=pytest.mark.wip(reason='Read from pdb has not bee implemented properly yet'))
491+
pytest.param('pdb', marks=pytest.mark.wip(reason='Read from pdb has not been implemented properly yet'))
427492
])
428493
def test_to_from_file(self, molecule, format):
429494
"""Test that conversion/creation of a molecule to and from a file is consistent."""
@@ -463,12 +528,7 @@ def test_to_from_oemol(self, molecule):
463528
# known_failures = {'ZINC05964684', 'ZINC05885163', 'ZINC05543156', 'ZINC17211981',
464529
# 'ZINC17312986', 'ZINC06424847', 'ZINC04963126'}
465530

466-
# DrugBank test set known failures.
467-
undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
468-
'DrugBank_2519', 'DrugBank_2987', 'DrugBank_3502',
469-
'DrugBank_3930', 'DrugBank_4161', 'DrugBank_4162',
470-
'DrugBank_5043', 'DrugBank_5418', 'DrugBank_6531'}
471-
undefined_stereo = molecule.name in undefined_stereo_mols
531+
undefined_stereo = molecule.name in openeye_drugbank_undefined_stereo_mols
472532

473533
toolkit_wrapper = OpenEyeToolkitWrapper()
474534

@@ -730,9 +790,9 @@ def test_add_virtual_site_units(self, molecule):
730790
"""
731791
Tests the unit type checking of the VirtualSite base class
732792
"""
733-
793+
734794
# TODO: Should these be using BondChargeVirtualSite, or should we just call the base class (which does the unit checks) directly?
735-
795+
736796
# Prepare values for unit checks
737797
distance_unitless = 0.4
738798
sigma_unitless = 0.1
@@ -786,7 +846,7 @@ def test_add_virtual_site_units(self, molecule):
786846
molecule.add_bond_charge_virtual_site([atom1, atom2, atom3], distance, charge_increments=charge_increments)
787847

788848
vsite3_index = molecule.add_bond_charge_virtual_site([atom1, atom2, atom3, atom4], distance, charge_increments=charge_increments)
789-
849+
790850
@pytest.mark.parametrize('molecule', mini_drug_bank())
791851
def test_add_bond_charge_virtual_site(self, molecule):
792852
"""Test the addition of a BondChargeVirtualSite to a molecule.

openforcefield/tests/test_topology.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,92 @@ def test_from_openmm_distinguish_using_stereochemistry(self):
507507
# process doesn't encode stereochemistry.
508508
raise NotImplementedError
509509

510+
def test_to_openmm_assign_unique_atom_names(self):
511+
"""
512+
Ensure that OFF topologies with no pre-existing atom names have unique
513+
atom names applied when being converted to openmm
514+
"""
515+
# Create OpenFF topology with 1 ethanol and 2 benzenes.
516+
ethanol = Molecule.from_smiles('CCO')
517+
benzene = Molecule.from_smiles('c1ccccc1')
518+
off_topology = Topology.from_molecules(molecules=[ethanol, benzene, benzene])
519+
omm_topology = off_topology.to_openmm()
520+
atom_names = set()
521+
for atom in omm_topology.atoms():
522+
atom_names.add(atom.name)
523+
# There should be 6 unique Cs, 6 unique Hs, and 1 unique O, for a total of 13 unique atom names
524+
assert len(atom_names) == 13
525+
526+
def test_to_openmm_assign_some_unique_atom_names(self):
527+
"""
528+
Ensure that OFF topologies with some pre-existing atom names have unique
529+
atom names applied to the other atoms when being converted to openmm
530+
"""
531+
# Create OpenFF topology with 1 ethanol and 2 benzenes.
532+
ethanol = Molecule.from_smiles('CCO')
533+
for atom in ethanol.atoms:
534+
atom.name = f'AT{atom.molecule_atom_index}'
535+
benzene = Molecule.from_smiles('c1ccccc1')
536+
off_topology = Topology.from_molecules(molecules=[ethanol, benzene, benzene])
537+
omm_topology = off_topology.to_openmm()
538+
atom_names = set()
539+
for atom in omm_topology.atoms():
540+
atom_names.add(atom.name)
541+
# There should be 9 "ATOM#"-labeled atoms, 6 unique Cs, and 6 unique Hs,
542+
# for a total of 21 unique atom names
543+
assert len(atom_names) == 21
544+
545+
def test_to_openmm_assign_unique_atom_names_some_duplicates(self):
546+
"""
547+
Ensure that OFF topologies where some molecules have invalid/duplicate
548+
atom names have unique atom names applied while the other molecules are unaffected.
549+
"""
550+
# Create OpenFF topology with 1 ethanol and 2 benzenes.
551+
ethanol = Molecule.from_smiles('CCO')
552+
553+
# Assign duplicate atom names in ethanol (two AT0s)
554+
ethanol_atom_names_with_duplicates = [f'AT{i}' for i in range(ethanol.n_atoms)]
555+
ethanol_atom_names_with_duplicates[1] = 'AT0'
556+
for atom, atom_name in zip(ethanol.atoms, ethanol_atom_names_with_duplicates):
557+
atom.name = atom_name
558+
559+
# Assign unique atom names in benzene
560+
benzene = Molecule.from_smiles('c1ccccc1')
561+
benzene_atom_names = [f'AT{i}' for i in range(benzene.n_atoms)]
562+
for atom, atom_name in zip(benzene.atoms, benzene_atom_names):
563+
atom.name = atom_name
564+
565+
off_topology = Topology.from_molecules(molecules=[ethanol, benzene, benzene])
566+
omm_topology = off_topology.to_openmm()
567+
atom_names = set()
568+
for atom in omm_topology.atoms():
569+
atom_names.add(atom.name)
570+
571+
# There should be 12 "AT#"-labeled atoms (from benzene), 2 unique Cs,
572+
# 1 unique O, and 6 unique Hs, for a total of 21 unique atom names
573+
assert len(atom_names) == 21
574+
575+
576+
def test_to_openmm_do_not_assign_unique_atom_names(self):
577+
"""
578+
Test disabling unique atom name assignment in Topology.to_openmm
579+
"""
580+
# Create OpenFF topology with 1 ethanol and 2 benzenes.
581+
ethanol = Molecule.from_smiles('CCO')
582+
for atom in ethanol.atoms:
583+
atom.name = 'eth_test'
584+
benzene = Molecule.from_smiles('c1ccccc1')
585+
benzene.atoms[0].name = 'bzn_test'
586+
off_topology = Topology.from_molecules(molecules=[ethanol, benzene, benzene])
587+
omm_topology = off_topology.to_openmm(ensure_unique_atom_names=False)
588+
atom_names = set()
589+
for atom in omm_topology.atoms():
590+
atom_names.add(atom.name)
591+
# There should be 9 atom named "eth_test", 1 atom named "bzn_test",
592+
# and 12 atoms named "", for a total of 3 unique atom names
593+
assert len(atom_names) == 3
594+
595+
510596
@pytest.mark.skipif( not(OpenEyeToolkitWrapper.is_available()), reason='Test requires OE toolkit')
511597
def test_chemical_environments_matches_OE(self):
512598
"""Test Topology.chemical_environment_matches"""

0 commit comments

Comments
 (0)