Skip to content

Commit 73cc43c

Browse files
author
joshhorton
committed
added tests for hill formula, is_isomorphic_with and are_isomorphic, and remaping tests. Also tweaked the toolkits to get the atom mapping if found and attach it to the moelecule properties dict.
1 parent 50f06c5 commit 73cc43c

File tree

3 files changed

+183
-59
lines changed

3 files changed

+183
-59
lines changed

openforcefield/tests/test_molecule.py

Lines changed: 105 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def assert_molecule_is_equal(molecule1, molecule2, msg):
5858
Message to include if molecules fail to match.
5959
6060
"""
61-
if not(molecule1.is_isomorphic(molecule2)):
61+
if not(molecule1.is_isomorphic_with(molecule2)):
6262
raise AssertionError(msg)
6363

6464

@@ -409,7 +409,7 @@ def test_to_from_iupac(self, molecule):
409409
Molecule.from_iupac(iupac)
410410

411411
molecule_copy = Molecule.from_iupac(iupac, allow_undefined_stereo=undefined_stereo)
412-
assert molecule.is_isomorphic(molecule_copy, compare_atom_stereochemistry=not undefined_stereo)
412+
assert molecule.is_isomorphic_with(molecule_copy)
413413

414414
@pytest.mark.parametrize('molecule', mini_drug_bank())
415415
def test_to_from_topology(self, molecule):
@@ -529,6 +529,109 @@ def test_name(self):
529529
molecule.name = name
530530
assert molecule.name == name
531531

532+
def test_hill_formula(self):
533+
"""Test that making the hill formula is consistent between input methods and ordering"""
534+
# make sure smiles match reference
535+
molecule_smiles = Molecule.from_smiles('CCO')
536+
assert molecule_smiles.hill_formula == 'C2H6O1'
537+
# make sure is not order dependent
538+
molecule_smiles_reverse = Molecule.from_smiles('OCC')
539+
assert molecule_smiles.hill_formula == molecule_smiles_reverse.hill_formula
540+
# make sure files and smiles match
541+
molecule_file = Molecule.from_file(get_data_file_path('molecules/ethanol.mol2'))
542+
assert molecule_smiles.hill_formula == molecule_file.hill_formula
543+
# make sure the topology molecule gives the same formula
544+
from openforcefield.topology.topology import TopologyMolecule, Topology
545+
topology = Topology.from_molecules(molecule_smiles)
546+
topmol = TopologyMolecule(molecule_smiles, topology)
547+
assert molecule_smiles.hill_formula == Molecule.to_hill_formula(topmol)
548+
# make sure the networkx matches
549+
assert molecule_smiles.hill_formula == Molecule.to_hill_formula(molecule_smiles.to_networkx())
550+
551+
def test_isomorphic(self):
552+
"""Test the different levels of isomorphic matching"""
553+
# check that hill formula fails are caught
554+
ethanol = Molecule.from_smiles('CCO')
555+
acetaldehyde = Molecule.from_smiles('CC=O')
556+
assert ethanol.is_isomorphic_with(acetaldehyde) is False
557+
assert acetaldehyde.is_isomorphic_with(ethanol) is False
558+
# check that different orderings work with full matching
559+
ethanol_reverse = Molecule.from_smiles('OCC')
560+
assert ethanol.is_isomorphic_with(ethanol_reverse) is True
561+
# check a reference mapping between ethanol and ethanol_reverse matches that calculated
562+
ref_mapping = {0: 2, 1: 1, 2: 0, 3: 6, 4: 7, 5: 8, 6: 4, 7: 5, 8: 3}
563+
assert Molecule.are_isomorphic(ethanol, ethanol_reverse, return_atom_map=True)[1] == ref_mapping
564+
# check matching with nx.Graph atomic numbers and connectivity only
565+
assert Molecule.are_isomorphic(ethanol, ethanol_reverse.to_networkx(), aromatic_matching=False,
566+
formal_charge_matching=False, bond_order_matching=False,
567+
atom_stereochemistry_matching=False,
568+
bond_stereochemistry_matching=False)[0] is True
569+
# check matching with nx.Graph with full matching
570+
assert ethanol.is_isomorphic_with(ethanol_reverse.to_networkx()) is True
571+
# check matching with a TopologyMolecule class
572+
from openforcefield.topology.topology import TopologyMolecule, Topology
573+
topology = Topology.from_molecules(ethanol)
574+
topmol = TopologyMolecule(ethanol, topology)
575+
assert Molecule.are_isomorphic(ethanol, topmol, aromatic_matching=False, formal_charge_matching=False,
576+
bond_order_matching=False, atom_stereochemistry_matching=False,
577+
bond_stereochemistry_matching=False)[0] is True
578+
# test hill formula passes but isomorphic fails
579+
mol1 = Molecule.from_smiles('Fc1ccc(F)cc1')
580+
mol2 = Molecule.from_smiles('Fc1ccccc1F')
581+
assert mol1.is_isomorphic_with(mol2) is False
582+
assert mol2.is_isomorphic_with(mol1) is False
583+
584+
def test_remap(self):
585+
"""Test the remap function which should return a new molecule in the requested ordering"""
586+
# the order here is CCO
587+
ethanol = Molecule.from_file(get_data_file_path('molecules/ethanol.mol2'))
588+
# get ethanol in reverse order OCC
589+
ethanol_reverse = Molecule.from_smiles('OCC')
590+
# get the mapping between the molecules
591+
mapping = Molecule.are_isomorphic(ethanol, ethanol_reverse, True)[1]
592+
ethanol.add_bond_charge_virtual_site([0, 1], 0.3 * unit.angstrom)
593+
# make sure that molecules with virtual sites raises an error
594+
with pytest.raises(Exception):
595+
remapped = ethanol.remap(mapping, current_to_new=True)
596+
597+
# remake with no virtual site and remap to match the reversed ordering
598+
ethanol = Molecule.from_file(get_data_file_path('molecules/ethanol.mol2'))
599+
600+
new_ethanol = ethanol.remap(mapping, current_to_new=True)
601+
602+
def assert_molecules_match_after_remap(mol1, mol2):
603+
"""Check all of the attributes in a molecule match after being remapped"""
604+
for atoms in zip(mol1.atoms, mol2.atoms):
605+
assert atoms[0].to_dict() == atoms[1].to_dict()
606+
# bonds will not be in the same order in the molecule and the atom1 and atom2 indecies could be out of order
607+
remaped_bonds = [bond.to_dict() for bond in mol2.bonds]
608+
for bond in mol1.bonds:
609+
assert bond.to_dict() in remaped_bonds
610+
assert mol1.n_bonds == mol2.n_bonds
611+
assert mol1.n_angles == mol2.n_angles
612+
assert mol1.n_propers == mol2.n_propers
613+
assert mol1.n_impropers == mol2.n_impropers
614+
assert mol1.total_charge == mol2.total_charge
615+
616+
# check all of the properties match as well, torsions and impropers will be in a different order
617+
# due to the bonds being out of order
618+
assert_molecules_match_after_remap(new_ethanol, ethanol_reverse)
619+
620+
# catch mappings that are the wrong size
621+
too_small_mapping = {0: 1}
622+
with pytest.raises(Exception):
623+
new_ethanol = ethanol.remap(too_small_mapping, current_to_new=True)
624+
625+
wrong_index_mapping = {(i + 10, new_id) for i, new_id in enumerate(mapping.values())}
626+
with pytest.raises(Exception):
627+
new_ethanol = ethanol.remap(wrong_index_mapping, current_to_new=True)
628+
629+
# test round trip (double remapping a molecule)
630+
new_ethanol = ethanol.remap(mapping, current_to_new=True)
631+
round_trip_mapping = Molecule.are_isomorphic(new_ethanol, ethanol, return_atom_map=True)[1]
632+
round_trip_ethanol = new_ethanol.remap(round_trip_mapping, current_to_new=True)
633+
assert_molecules_match_after_remap(round_trip_ethanol, ethanol)
634+
532635
@pytest.mark.parametrize('molecule', mini_drug_bank())
533636
def test_n_particles(self, molecule):
534637
"""Test n_particles property"""

openforcefield/topology/molecule.py

Lines changed: 62 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1789,7 +1789,7 @@ def __eq__(self, other):
17891789
No effort is made to ensure that the atoms are in the same order or that any annotated properties are preserved.
17901790
17911791
"""
1792-
return FrozenMolecule.are_isomorphic(self, other, return_atom_map=False)
1792+
return FrozenMolecule.are_isomorphic(self, other, return_atom_map=False)[0]
17931793

17941794
def to_smiles(self, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY):
17951795
"""
@@ -1900,7 +1900,6 @@ def are_isomorphic(
19001900
bond_stereochemistry_matching=True,
19011901
):
19021902
"""
1903-
#TODO write tests for imorphic checks and mapping.
19041903
Determines whether the two molecules are isomorphic by comparing their graphs.
19051904
19061905
Parameters
@@ -1932,8 +1931,6 @@ def are_isomorphic(
19321931
# Do a quick hill formula check first
19331932
if FrozenMolecule.to_hill_formula(mol1) != FrozenMolecule.to_hill_formula(mol2):
19341933
return False, None
1935-
else:
1936-
print(FrozenMolecule.to_hill_formula(mol1), FrozenMolecule.to_hill_formula(mol2))
19371934

19381935
# Build the user defined matching functions
19391936
def node_match_func(x, y):
@@ -1976,10 +1973,15 @@ def find_data_type(data):
19761973
"""Find the data type and return the networkx graph"""
19771974

19781975
try:
1976+
# Molecule class instance
19791977
return data.to_networkx()
19801978
except AttributeError:
1981-
if isinstance(data, nx.Graph):
1982-
return data
1979+
try:
1980+
# TopologyMolecule class instance
1981+
return data.reference_molecule.to_networkx()
1982+
except AttributeError:
1983+
if isinstance(data, nx.Graph):
1984+
return data
19831985

19841986
mol1_netx = find_data_type(mol1)
19851987
mol2_netx = find_data_type(mol2)
@@ -2033,7 +2035,7 @@ def is_isomorphic_with(self, other):
20332035

20342036
# what level of matching do we want here?
20352037
# should we expose some options as well?
2036-
return FrozenMolecule.are_isomorphic(self, other, return_atom_map=False)
2038+
return FrozenMolecule.are_isomorphic(self, other, return_atom_map=False)[0]
20372039

20382040
def generate_conformers(self,
20392041
toolkit_registry=GLOBAL_TOOLKIT_REGISTRY,
@@ -3374,44 +3376,37 @@ def from_mapped_smiles(cls, mapped_smiles):
33743376
:return: openforcefield.topology.molecule.Molecule
33753377
"""
33763378

3377-
# extract the atomic mapping
3378-
mapping = []
3379-
for atom in mapped_smiles.split(':'):
3380-
try:
3381-
mapping.append(int(atom[:2]))
3382-
except ValueError:
3383-
try:
3384-
mapping.append(int(atom[:1]))
3385-
except ValueError:
3386-
pass
3379+
# create the molecule from the smiles and check we have the right number of indexes
3380+
# in the mapped SMILES
3381+
offmol = cls.from_smiles(mapped_smiles, hydrogens_are_explicit=True)
33873382

33883383
# check we found some mapping
3389-
if len(mapping) == 0:
3384+
try:
3385+
mapping = offmol._properties['atom_map']
3386+
except KeyError:
33903387
raise SmilesParsingError('The given SMILES has no indexing, please generate a valid explicit hydrogen '
33913388
'mapped SMILES using cmiles.')
3392-
# create the molecule from the smiles and check we have the right number of indexes
3393-
# in the mapped SMILES
3394-
offmol = cls.from_smiles(mapped_smiles, hydrogens_are_explicit=True)
33953389

33963390
if len(mapping) != offmol.n_atoms:
33973391
raise SmilesParsingError('The mapped smiles does not contain enough indexes to remap the molecule.')
33983392

3399-
# Make a valid atom map dict Dict[new_index: old_index] and remap
3400-
# use -1 here as the cmiles maps from 1 not 0
3401-
atom_mapping = dict((i, index - 1) for i, index in enumerate(mapping))
3393+
# remap the molecule using the atom map found in the smiles
3394+
# the order is mapping = Dict[current_index: new_index]
34023395

3403-
return offmol.remap(atom_mapping, new_to_old=False)
3396+
return offmol.remap(mapping, new_to_current=False)
34043397

34053398
@classmethod
3406-
def from_qcarchive(cls, qca_json, client_instance=None, allow_undefined_stereo=False):
3399+
def from_qcschema(cls, qca_dict, client=None, allow_undefined_stereo=False):
34073400
"""
34083401
Create a Molecule from a QCArchive entry based on the cmiles information.
34093402
3410-
If we also have a client instance we can go and attach the starting geometry.
3403+
If we also have a client instance/address we can go and attach the starting geometry.
34113404
34123405
Parameters
34133406
----------
3414-
qca_mol : a QCArchive json representation
3407+
qca_dict : a QCArchive dict with json encoding
3408+
#TODO should this also accept the entry record instance that we can type check?
3409+
client : a qcportal.FractalClient instance or addess of an archive that we can search for the geometry.
34153410
34163411
Returns
34173412
-------
@@ -3420,10 +3415,13 @@ def from_qcarchive(cls, qca_json, client_instance=None, allow_undefined_stereo=F
34203415
34213416
Examples
34223417
--------
3418+
>>> import qcportal as ptl
3419+
>>> client = ptl.FractalClient()
3420+
>>>
34233421
34243422
"""
34253423

3426-
if 'canonical_isomeric_explicit_hydrogen_mapped_smiles' in qca_json['attributes'].keys():
3424+
if 'canonical_isomeric_explicit_hydrogen_mapped_smiles' in qca_dict['attributes'].keys():
34273425
# make a new molecule that has been reordered to match the cmiles mapping
34283426
offmol = cls.from_mapped_smiles(qca_json['attributes']['canonical_isomeric_explicit_hydrogen_mapped_smiles'])
34293427
if client_instance is not None:
@@ -3492,44 +3490,54 @@ def from_pdb(cls, file_path, smiles, allow_undefined_stereo=False):
34923490
else:
34933491
raise InvalidConformerError('The PDB and SMILES structures do not match.')
34943492

3495-
def remap(self, mapping_dict, new_to_old=True):
3493+
def remap(self, mapping_dict, current_to_new=True):
34963494
"""
34973495
Remap all of the indexes in the molecule to match the given mapping dict
3498-
:param mapping_dict: A dictionary of the mapping between in the indexes
3499-
:param new_to_old: The dict is {new_index: old_index} if True else {old_index: new_index}
3500-
:return:
3496+
:param mapping_dict: A dictionary of the mapping between in the indexes, this should start from 0.
3497+
:param current_to_new: The dict is {current_index: new_index} if True else {new_index: current_index}
3498+
:return: a new openforcefield.topology.molecule.Molecule instance with all attributes transferred
35013499
"""
35023500

35033501
if self.n_virtual_sites != 0:
35043502
raise NotImplementedError('We can not remap virtual sites yet!')
35053503

3504+
# make sure the size of the mapping matches the current molecule
3505+
if len(mapping_dict) != self.n_atoms:
3506+
raise ValueError(f'There are too many mapping indices({len(mapping_dict)}) for the amount of atoms in this '
3507+
f'molecule({self.n_atoms})')
3508+
35063509
# make two mapping dicts we need new to old for atoms
35073510
# and old to new for bonds
3508-
if new_to_old:
3509-
n_to_o = mapping_dict
3510-
o_to_n = dict((v, u) for u, v in mapping_dict.items())
3511+
if current_to_new:
3512+
cur_to_new = mapping_dict
3513+
new_to_cur = dict((v, u) for u, v in mapping_dict.items())
35113514
else:
3512-
o_to_n = mapping_dict
3513-
n_to_o = dict((v, u) for u, v in mapping_dict.items())
3515+
new_to_cur = mapping_dict
3516+
cur_to_new = dict((v, u) for u, v in mapping_dict.items())
35143517

35153518
new_molecule = Molecule()
35163519
new_molecule.name = self.name
35173520

3518-
# add the atoms list
3519-
for i in range(self.n_atoms):
3520-
# get the old atom info
3521-
old_atom = self._atoms[n_to_o[i]]
3522-
new_molecule.add_atom(atomic_number=old_atom.atomic_number,
3523-
formal_charge=old_atom.formal_charge,
3524-
is_aromatic=old_atom.is_aromatic,
3525-
stereochemistry=old_atom.stereochemistry,
3526-
name=old_atom.name)
3527-
# add the bonds
3521+
try:
3522+
# add the atoms list
3523+
for i in range(self.n_atoms):
3524+
# get the old atom info
3525+
old_atom = self._atoms[new_to_cur[i]]
3526+
new_molecule.add_atom(atomic_number=old_atom.atomic_number,
3527+
formal_charge=old_atom.formal_charge,
3528+
is_aromatic=old_atom.is_aromatic,
3529+
stereochemistry=old_atom.stereochemistry,
3530+
name=old_atom.name)
3531+
# this is the first time we access the mapping catch an index error here corresponding to mapping that starts
3532+
# from 0 or higher
3533+
except IndexError:
3534+
raise IndexError(f'The mapping supplied is missing a relation corresponding to atom({i})')
3535+
3536+
# add the bonds but with atom indexes in a sorted ascending order
35283537
for bond in self._bonds:
3529-
a1 = o_to_n[bond.atom1_index]
3530-
a2 = o_to_n[bond.atom2_index]
3531-
new_molecule.add_bond(atom1=a1,
3532-
atom2=a2,
3538+
atoms = sorted([cur_to_new[bond.atom1_index], cur_to_new[bond.atom2_index]])
3539+
new_molecule.add_bond(atom1=atoms[0],
3540+
atom2=atoms[1],
35333541
bond_order=bond.bond_order,
35343542
is_aromatic=bond.is_aromatic,
35353543
stereochemistry=bond.stereochemistry,
@@ -3538,15 +3546,15 @@ def remap(self, mapping_dict, new_to_old=True):
35383546
# remap the charges
35393547
new_charges = np.zeros(self.n_atoms)
35403548
for i in range(self.n_atoms):
3541-
new_charges[i] = self.partial_charges[n_to_o[i]].value_in_unit(unit.elementary_charge)
3549+
new_charges[i] = self.partial_charges[new_to_cur[i]].value_in_unit(unit.elementary_charge)
35423550
new_molecule.partial_charges = new_charges * unit.elementary_charge
35433551

35443552
# remap the conformers there can be more than one
35453553
if self.conformers is not None:
35463554
for conformer in self.conformers:
35473555
new_conformer = np.zeros((self.n_atoms, 3))
35483556
for i in range(self.n_atoms):
3549-
new_conformer[i] = conformer[n_to_o[i]].value_in_unit(unit.angstrom)
3557+
new_conformer[i] = conformer[new_to_cur[i]].value_in_unit(unit.angstrom)
35503558
new_molecule.add_conformer(new_conformer * unit.angstrom)
35513559

35523560
# move any properties across

0 commit comments

Comments
 (0)