openforcefield · j-wags · Feb 4, 2020 · Nov 30, 2019 · Nov 30, 2019 · Nov 30, 2019
diff --git a/docs/releasehistory.rst b/docs/releasehistory.rst
@@ -7,6 +7,38 @@ Releases follow the ``major.minor.micro`` scheme recommended by `PEP440 <https:/
 * ``minor`` increments add features but do not break API compatibility
 * ``micro`` increments represent bugfix releases or improvements in documentation
 
+0.6.1 - Bugfixes
+----------------
+
+New features
+""""""""""""
+- `PR #469 <https://github.com/openforcefield/openforcefield/pull/469>`_:
+  The :py:class:`Molecule <openforcefield.topology.Molecule>` adds
+  :py:meth:`Molecule.has_unique_atom_names <openforcefield.topology.Molecule.has_unique_atom_names>`
+  and :py:meth:`Molecule.has_unique_atom_names <openforcefield.topology.Molecule.generate_unique_atom_names>`.
+
+Behavior changed
+""""""""""""""""
+- `PR #469 <https://github.com/openforcefield/openforcefield/pull/469>`_:
+  When running :py:meth:`Topology.to_openmm <openforcefield.topology.Topology.to_openmm>`, unique atom names
+  are generated (overriding any existing atom names) if the provided atom names are not unique. This
+  uniqueness extends only to atoms in the same molecule. To disable this behavior, set the kwarg
+  ``ensure_unique_atom_names=False``.
+
+Tests added
+"""""""""""
+- `PR #469 <https://github.com/openforcefield/openforcefield/pull/469>`_: Added round-trip SMILES test
+  to add coverage for :py:meth:`Molecule.from_smiles <openforcefield.topology.Molecule.from_smiles>`.
+- `PR #469 <https://github.com/openforcefield/openforcefield/pull/469>`_: Added tests for unique atom
+  naming behavior in  :py:meth:`Topology.to_openmm <openforcefield.topology.Topology.to_openmm>`, as
+  well as tests of the ``ensure_unique_atom_names=False`` kwarg disabling this behavior.
+
+Bugfixes
+""""""""
+- `Issue #460 <https://github.com/openforcefield/openforcefield/issues/460>`_: Creates unique atom
+  names in :py:meth:`Topology.to_openmm <openforcefield.topology.Topology.to_openmm>` if the existing
+  ones are not unique. The lack of unique atom names caused problems in workflows involving
+  downstream tools that expect unique atom names.
 
 0.6.0 - Library Charges
 -----------------------
@@ -90,10 +122,10 @@ Behavior changed
 
 Tests added
 """""""""""
-- `PR #430 <https://github.com/openforcefield/openforcefield/pull/430>`_: Added test for 
-  Wiberg Bond Order implemented in OpenEye Toolkits. Molecules taken from 
+- `PR #430 <https://github.com/openforcefield/openforcefield/pull/430>`_: Added test for
+  Wiberg Bond Order implemented in OpenEye Toolkits. Molecules taken from
   DOI:10.5281/zenodo.3405489 . Added by Sukanya Sasmal.
- 
+
 
 Bugfixes
 """"""""
@@ -474,7 +506,7 @@ API-breaking Changes
 * `PR #291 <https://github.com/openforcefield/openforcefield/pull/291>`_: Remove ``ForceField.load/to_smirnoff_data``, add ``ForceField.to_file/string`` and ``ParameterHandler.add_parameters``. Change behavior of ``ForceField.register_X_handler`` functions.
 
 Bugfixes
-"""""""" 
+""""""""
 * `PR #327 <https://github.com/openforcefield/openforcefield/pull/327>`_: Fix units in tip3p.offxml (note that this file is still not loadable by current toolkit)
 * `PR #325 <https://github.com/openforcefield/openforcefield/pull/325>`_: Fix solvent box for provided test system to resolve periodic clashes.
 * `PR #325 <https://github.com/openforcefield/openforcefield/pull/325>`_: Add informative message containing Hill formula when a molecule can't be matched in ``Topology.from_openmm``.

diff --git a/openforcefield/tests/test_forcefield.py b/openforcefield/tests/test_forcefield.py
@@ -746,7 +746,7 @@ def test_parameterize_ethanol_missing_torsion(self):
         topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules)
         with pytest.raises(UnassignedProperTorsionParameterException,
                            match='- Topology indices [(]5, 0, 1, 6[)]: '
-                                 'names and elements [(] H[)], [(] C[)], [(] C[)], [(] H[)],') \
+                                 'names and elements [(](H\d+)? H[)], [(](C\d+)? C[)], [(](C\d+)? C[)], [(](H\d+)? H[)],') \
                 as excinfo:
             omm_system = forcefield.create_openmm_system(topology)
 

diff --git a/openforcefield/tests/test_molecule.py b/openforcefield/tests/test_molecule.py
@@ -192,6 +192,26 @@ def mini_drug_bank(xfail_mols=None, wip_mols=None):
 # used inside pytest.mark.parametrize (see issue #349 in pytest).
 mini_drug_bank.molecules = None
 
+# All the molecules that raise UndefinedStereochemistryError when read by OETK()
+openeye_drugbank_undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
+                                          'DrugBank_2519', 'DrugBank_2987', 'DrugBank_3502',
+                                          'DrugBank_3930', 'DrugBank_4161', 'DrugBank_4162',
+                                          'DrugBank_5043', 'DrugBank_5418', 'DrugBank_6531'}
+
+# All the molecules that raise UndefinedStereochemistryError when read by OETK().
+# Note that this list is different from that for OEMol,
+# since the toolkits have different definitions of "stereogenic"
+rdkit_drugbank_undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1962', 'DrugBank_2519',
+                                        'DrugBank_3930', 'DrugBank_5043', 'DrugBank_5418'}
+
+
+# Missing stereo in OE but not RDK:  'DrugBank_2987', 'DrugBank_3502', 'DrugBank_4161',
+# 'DrugBank_4162', 'DrugBank_6531', 'DrugBank_1700',
+
+# Some molecules are _valid_ in both OETK and RDKit, but will fail if you try
+# to convert from one to the other, since OE adds stereo that RDKit doesn't
+drugbank_stereogenic_in_oe_but_not_rdkit = {'DrugBank_1598', 'DrugBank_4346', 'DrugBank_1849',
+                                            'DrugBank_2141'}
 
 #=============================================================================================
 # TESTS
@@ -260,6 +280,55 @@ def test_create_copy(self, molecule):
         molecule_copy = Molecule(molecule)
         assert molecule_copy == molecule
 
+    @pytest.mark.parametrize('toolkit', [OpenEyeToolkitWrapper, RDKitToolkitWrapper])
+    @pytest.mark.parametrize('molecule', mini_drug_bank())
+    def test_to_from_smiles(self, molecule, toolkit):
+        """Test round-trip creation from SMILES"""
+        if not toolkit.is_available():
+            pytest.skip('Required toolkit is unavailable')
+
+        if toolkit == RDKitToolkitWrapper:
+            # Skip the test if OpenEye assigns stereochemistry but RDKit doesn't (since then, the
+            # OFF molecule will be loaded, but fail to convert in to_rdkit)
+            if molecule.name in drugbank_stereogenic_in_oe_but_not_rdkit:
+                pytest.skip('Molecle is stereogenic in OpenEye (which loaded this dataset), but not RDKit, so it '
+                            'is impossible to make a valid RDMol in this test')
+            undefined_stereo_mols = rdkit_drugbank_undefined_stereo_mols
+        elif toolkit == OpenEyeToolkitWrapper:
+            undefined_stereo_mols = openeye_drugbank_undefined_stereo_mols
+
+        toolkit_wrapper = toolkit()
+
+        undefined_stereo = molecule.name in undefined_stereo_mols
+
+        smiles1 = molecule.to_smiles(toolkit_registry=toolkit_wrapper)
+        if undefined_stereo:
+            molecule2 = Molecule.from_smiles(smiles1,
+                                             allow_undefined_stereo=True,
+                                             toolkit_registry=toolkit_wrapper)
+        else:
+            molecule2 = Molecule.from_smiles(smiles1,
+                                             toolkit_registry=toolkit_wrapper)
+        smiles2 = molecule2.to_smiles(toolkit_registry=toolkit_wrapper)
+        assert (smiles1 == smiles2)
+
+    @pytest.mark.parametrize('molecule', mini_drug_bank())
+    def test_unique_atom_names(self, molecule):
+        """Test molecules have unique atom names"""
+        # The dataset we load in has atom names, so let's strip them first
+        # to ensure that we can fail the uniqueness check
+        for atom in molecule.atoms:
+            atom.name = ''
+        assert not(molecule.has_unique_atom_names)
+        # Then genreate unique atom names using the built in algorithm
+        molecule.generate_unique_atom_names()
+        # Check that the molecule has unique atom names
+        assert molecule.has_unique_atom_names
+        # Check molecule.has_unique_atom_names is working correctly
+        assert ((len(set([atom.name for atom in molecule.atoms])) == molecule.n_atoms) == molecule.has_unique_atom_names)
+        molecule.atoms[1].name = molecule.atoms[0].name # no longer unique
+        assert ((len(set([atom.name for atom in molecule.atoms])) == molecule.n_atoms) == molecule.has_unique_atom_names)
+
     # TODO: Should there be an equivalent toolkit test and leave this as an integration test?
     @pytest.mark.slow
     def test_create_from_file(self):
@@ -310,13 +379,7 @@ def test_to_from_rdkit(self, molecule):
         # import pickle
         from openforcefield.utils.toolkits import UndefinedStereochemistryError
 
-        # DrugBank test set known failures. Note that this list is different from that for OEMol,
-        # since the toolkits have different definitions of "stereogenic"
-        # Stereogenic in OE but not RDK:  'DrugBank_2987', 'DrugBank_3502', 'DrugBank_4161',
-        # 'DrugBank_4162', 'DrugBank_6531', 'DrugBank_1700',
-        undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1962', 'DrugBank_2519',
-                                 'DrugBank_3930', 'DrugBank_5043', 'DrugBank_5418'}
-        undefined_stereo = molecule.name in undefined_stereo_mols
+        undefined_stereo = molecule.name in rdkit_drugbank_undefined_stereo_mols
 
         toolkit_wrapper = RDKitToolkitWrapper()
 
@@ -385,22 +448,24 @@ def test_to_from_iupac(self, molecule):
         from openforcefield.utils.toolkits import UndefinedStereochemistryError
 
         # All the molecules that raise UndefinedStereochemistryError in Molecule.from_iupac()
-        undefined_stereo_mols = {'DrugBank_977', 'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
-                                 'DrugBank_2148', 'DrugBank_2178', 'DrugBank_2186', 'DrugBank_2208',
-                                 'DrugBank_2519', 'DrugBank_2538', 'DrugBank_2592', 'DrugBank_2651',
-                                 'DrugBank_2987', 'DrugBank_3332', 'DrugBank_3502', 'DrugBank_3622',
-                                 'DrugBank_3726', 'DrugBank_3844', 'DrugBank_3930', 'DrugBank_4161',
-                                 'DrugBank_4162', 'DrugBank_4778', 'DrugBank_4593', 'DrugBank_4959',
-                                 'DrugBank_5043', 'DrugBank_5076', 'DrugBank_5176', 'DrugBank_5418',
-                                 'DrugBank_5737', 'DrugBank_5902', 'DrugBank_6304', 'DrugBank_6305',
-                                 'DrugBank_6329', 'DrugBank_6355', 'DrugBank_6401', 'DrugBank_6509',
-                                 'DrugBank_6531', 'DrugBank_6647',
-
-                                 # These test cases are allowed to fail.
-                                 'DrugBank_390', 'DrugBank_810', 'DrugBank_4316', 'DrugBank_4346',
-                                 'DrugBank_7124'
-                                 }
-        undefined_stereo = molecule.name in undefined_stereo_mols
+        # (This is a larger list than the normal group of undefined stereo mols, probably has
+        # something to do with IUPAC information content)
+        iupac_problem_mols = {'DrugBank_977', 'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
+                              'DrugBank_2148', 'DrugBank_2178', 'DrugBank_2186', 'DrugBank_2208',
+                              'DrugBank_2519', 'DrugBank_2538', 'DrugBank_2592', 'DrugBank_2651',
+                              'DrugBank_2987', 'DrugBank_3332', 'DrugBank_3502', 'DrugBank_3622',
+                              'DrugBank_3726', 'DrugBank_3844', 'DrugBank_3930', 'DrugBank_4161',
+                              'DrugBank_4162', 'DrugBank_4778', 'DrugBank_4593', 'DrugBank_4959',
+                              'DrugBank_5043', 'DrugBank_5076', 'DrugBank_5176', 'DrugBank_5418',
+                              'DrugBank_5737', 'DrugBank_5902', 'DrugBank_6304', 'DrugBank_6305',
+                              'DrugBank_6329', 'DrugBank_6355', 'DrugBank_6401', 'DrugBank_6509',
+                              'DrugBank_6531', 'DrugBank_6647',
+
+                              # These test cases are allowed to fail.
+                              'DrugBank_390', 'DrugBank_810', 'DrugBank_4316', 'DrugBank_4346',
+                              'DrugBank_7124'
+                              }
+        undefined_stereo = molecule.name in iupac_problem_mols
 
         iupac = molecule.to_iupac()
 
@@ -423,7 +488,7 @@ def test_to_from_topology(self, molecule):
     @pytest.mark.parametrize('format', [
         'mol2',
         'sdf',
-        pytest.param('pdb', marks=pytest.mark.wip(reason='Read from pdb has not bee implemented properly yet'))
+        pytest.param('pdb', marks=pytest.mark.wip(reason='Read from pdb has not been implemented properly yet'))
     ])
     def test_to_from_file(self, molecule, format):
         """Test that conversion/creation of a molecule to and from a file is consistent."""
@@ -463,12 +528,7 @@ def test_to_from_oemol(self, molecule):
         # known_failures = {'ZINC05964684', 'ZINC05885163', 'ZINC05543156', 'ZINC17211981',
         #                   'ZINC17312986', 'ZINC06424847', 'ZINC04963126'}
 
-        # DrugBank test set known failures.
-        undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
-                                 'DrugBank_2519', 'DrugBank_2987', 'DrugBank_3502',
-                                 'DrugBank_3930', 'DrugBank_4161', 'DrugBank_4162',
-                                 'DrugBank_5043', 'DrugBank_5418', 'DrugBank_6531'}
-        undefined_stereo = molecule.name in undefined_stereo_mols
+        undefined_stereo = molecule.name in openeye_drugbank_undefined_stereo_mols
 
         toolkit_wrapper = OpenEyeToolkitWrapper()
 
@@ -730,9 +790,9 @@ def test_add_virtual_site_units(self, molecule):
         """
         Tests the unit type checking of the VirtualSite base class
         """
-        
+
         # TODO: Should these be using BondChargeVirtualSite, or should we just call the base class (which does the unit checks) directly?
-        
+
         # Prepare values for unit checks
         distance_unitless = 0.4
         sigma_unitless = 0.1
@@ -786,7 +846,7 @@ def test_add_virtual_site_units(self, molecule):
             molecule.add_bond_charge_virtual_site([atom1, atom2, atom3], distance, charge_increments=charge_increments)
 
         vsite3_index = molecule.add_bond_charge_virtual_site([atom1, atom2, atom3, atom4], distance, charge_increments=charge_increments)
-            
+
     @pytest.mark.parametrize('molecule', mini_drug_bank())
     def test_add_bond_charge_virtual_site(self, molecule):
         """Test the addition of a BondChargeVirtualSite to a molecule.

diff --git a/openforcefield/tests/test_topology.py b/openforcefield/tests/test_topology.py
@@ -507,6 +507,92 @@ def test_from_openmm_distinguish_using_stereochemistry(self):
         # process doesn't encode stereochemistry.
         raise NotImplementedError
 
+    def test_to_openmm_assign_unique_atom_names(self):
+        """
+        Ensure that OFF topologies with no pre-existing atom names have unique
+        atom names applied when being converted to openmm
+        """
+        # Create OpenFF topology with 1 ethanol and 2 benzenes.
+        ethanol = Molecule.from_smiles('CCO')
+        benzene = Molecule.from_smiles('c1ccccc1')
+        off_topology = Topology.from_molecules(molecules=[ethanol, benzene, benzene])
+        omm_topology = off_topology.to_openmm()
+        atom_names = set()
+        for atom in omm_topology.atoms():
+            atom_names.add(atom.name)
+        # There should be 6 unique Cs, 6 unique Hs, and 1 unique O, for a total of 13 unique atom names
+        assert len(atom_names) == 13
+
+    def test_to_openmm_assign_some_unique_atom_names(self):
+        """
+        Ensure that OFF topologies with some pre-existing atom names have unique
+        atom names applied to the other atoms when being converted to openmm
+        """
+        # Create OpenFF topology with 1 ethanol and 2 benzenes.
+        ethanol = Molecule.from_smiles('CCO')
+        for atom in ethanol.atoms:
+            atom.name = f'AT{atom.molecule_atom_index}'
+        benzene = Molecule.from_smiles('c1ccccc1')
+        off_topology = Topology.from_molecules(molecules=[ethanol, benzene, benzene])
+        omm_topology = off_topology.to_openmm()
+        atom_names = set()
+        for atom in omm_topology.atoms():
+            atom_names.add(atom.name)
+        # There should be 9 "ATOM#"-labeled atoms, 6 unique Cs, and 6 unique Hs,
+        # for a total of 21 unique atom names
+        assert len(atom_names) == 21
+
+    def test_to_openmm_assign_unique_atom_names_some_duplicates(self):
+        """
+        Ensure that OFF topologies where some molecules have invalid/duplicate
+        atom names have unique atom names applied while the other molecules are unaffected.
+        """
+        # Create OpenFF topology with 1 ethanol and 2 benzenes.
+        ethanol = Molecule.from_smiles('CCO')
+
+        # Assign duplicate atom names in ethanol (two AT0s)
+        ethanol_atom_names_with_duplicates = [f'AT{i}' for i in range(ethanol.n_atoms)]
+        ethanol_atom_names_with_duplicates[1] = 'AT0'
+        for atom, atom_name in zip(ethanol.atoms, ethanol_atom_names_with_duplicates):
+            atom.name = atom_name
+
+        # Assign unique atom names in benzene
+        benzene = Molecule.from_smiles('c1ccccc1')
+        benzene_atom_names = [f'AT{i}' for i in range(benzene.n_atoms)]
+        for atom, atom_name in zip(benzene.atoms, benzene_atom_names):
+            atom.name = atom_name
+
+        off_topology = Topology.from_molecules(molecules=[ethanol, benzene, benzene])
+        omm_topology = off_topology.to_openmm()
+        atom_names = set()
+        for atom in omm_topology.atoms():
+            atom_names.add(atom.name)
+
+        # There should be  12 "AT#"-labeled atoms (from benzene), 2 unique Cs,
+        # 1 unique O, and 6 unique Hs, for a total of 21 unique atom names
+        assert len(atom_names) == 21
+
+
+    def test_to_openmm_do_not_assign_unique_atom_names(self):
+        """
+        Test disabling unique atom name assignment in Topology.to_openmm
+        """
+        # Create OpenFF topology with 1 ethanol and 2 benzenes.
+        ethanol = Molecule.from_smiles('CCO')
+        for atom in ethanol.atoms:
+            atom.name = 'eth_test'
+        benzene = Molecule.from_smiles('c1ccccc1')
+        benzene.atoms[0].name = 'bzn_test'
+        off_topology = Topology.from_molecules(molecules=[ethanol, benzene, benzene])
+        omm_topology = off_topology.to_openmm(ensure_unique_atom_names=False)
+        atom_names = set()
+        for atom in omm_topology.atoms():
+            atom_names.add(atom.name)
+        # There should be 9 atom named "eth_test", 1 atom named "bzn_test",
+        # and 12 atoms named "", for a total of 3 unique atom names
+        assert len(atom_names) == 3
+
+
     @pytest.mark.skipif( not(OpenEyeToolkitWrapper.is_available()), reason='Test requires OE toolkit')
     def test_chemical_environments_matches_OE(self):
         """Test Topology.chemical_environment_matches"""