Skip to content

Commit 14c1d2f

Browse files
committed
separate smiles roundtrip tests by toolkit, consolidate toolkit stereo diffs
1 parent 56de48a commit 14c1d2f

File tree

1 file changed

+69
-34
lines changed

1 file changed

+69
-34
lines changed

openforcefield/tests/test_molecule.py

Lines changed: 69 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,26 @@ def mini_drug_bank(xfail_mols=None, wip_mols=None):
192192
# used inside pytest.mark.parametrize (see issue #349 in pytest).
193193
mini_drug_bank.molecules = None
194194

195+
# All the molecules that raise UndefinedStereochemistryError when read by OETK()
196+
openeye_drugbank_undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
197+
'DrugBank_2519', 'DrugBank_2987', 'DrugBank_3502',
198+
'DrugBank_3930', 'DrugBank_4161', 'DrugBank_4162',
199+
'DrugBank_5043', 'DrugBank_5418', 'DrugBank_6531'}
200+
201+
# All the molecules that raise UndefinedStereochemistryError when read by OETK().
202+
# Note that this list is different from that for OEMol,
203+
# since the toolkits have different definitions of "stereogenic"
204+
rdkit_drugbank_undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1962', 'DrugBank_2519',
205+
'DrugBank_3930', 'DrugBank_5043', 'DrugBank_5418'}
206+
207+
208+
# Missing stereo in OE but not RDK: 'DrugBank_2987', 'DrugBank_3502', 'DrugBank_4161',
209+
# 'DrugBank_4162', 'DrugBank_6531', 'DrugBank_1700',
210+
211+
# Some molecules are _valid_ in both OETK and RDKit, but will fail if you try
212+
# to convert from one to the other, since OE adds stereo that RDKit doesn't
213+
drugbank_stereogenic_in_oe_but_not_rdkit = {'DrugBank_1598', 'DrugBank_4346', 'DrugBank_1849',
214+
'DrugBank_2141'}
195215

196216
#=============================================================================================
197217
# TESTS
@@ -260,12 +280,36 @@ def test_create_copy(self, molecule):
260280
molecule_copy = Molecule(molecule)
261281
assert molecule_copy == molecule
262282

283+
@pytest.mark.parametrize('toolkit', [OpenEyeToolkitWrapper, RDKitToolkitWrapper])
263284
@pytest.mark.parametrize('molecule', mini_drug_bank())
264-
def test_from_smiles(self, molecule):
285+
def test_to_from_smiles(self, molecule, toolkit):
265286
"""Test round-trip creation from SMILES"""
266-
smiles1 = molecule.to_smiles()
267-
molecule2 = Molecule.from_smiles(smiles1)
268-
smiles2 = molecule2.to_smiles()
287+
if not toolkit.is_available():
288+
pytest.skip('Required toolkit is unavailable')
289+
290+
if toolkit == RDKitToolkitWrapper:
291+
# Skip the test if OpenEye assigns stereochemistry but RDKit doesn't (since then, the
292+
# OFF molecule will be loaded, but fail to convert in to_rdkit)
293+
if molecule.name in drugbank_stereogenic_in_oe_but_not_rdkit:
294+
pytest.skip('Molecle is stereogenic in OpenEye (which loaded this dataset), but not RDKit, so it '
295+
'is impossible to make a valid RDMol in this test')
296+
undefined_stereo_mols = rdkit_drugbank_undefined_stereo_mols
297+
elif toolkit == OpenEyeToolkitWrapper:
298+
undefined_stereo_mols = openeye_drugbank_undefined_stereo_mols
299+
300+
toolkit_wrapper = toolkit()
301+
302+
undefined_stereo = molecule.name in undefined_stereo_mols
303+
304+
smiles1 = molecule.to_smiles(toolkit_registry=toolkit_wrapper)
305+
if undefined_stereo:
306+
molecule2 = Molecule.from_smiles(smiles1,
307+
allow_undefined_stereo=True,
308+
toolkit_registry=toolkit_wrapper)
309+
else:
310+
molecule2 = Molecule.from_smiles(smiles1,
311+
toolkit_registry=toolkit_wrapper)
312+
smiles2 = molecule2.to_smiles(toolkit_registry=toolkit_wrapper)
269313
assert (smiles1 == smiles2)
270314

271315
# TODO: Should there be an equivalent toolkit test and leave this as an integration test?
@@ -318,13 +362,7 @@ def test_to_from_rdkit(self, molecule):
318362
# import pickle
319363
from openforcefield.utils.toolkits import UndefinedStereochemistryError
320364

321-
# DrugBank test set known failures. Note that this list is different from that for OEMol,
322-
# since the toolkits have different definitions of "stereogenic"
323-
# Stereogenic in OE but not RDK: 'DrugBank_2987', 'DrugBank_3502', 'DrugBank_4161',
324-
# 'DrugBank_4162', 'DrugBank_6531', 'DrugBank_1700',
325-
undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1962', 'DrugBank_2519',
326-
'DrugBank_3930', 'DrugBank_5043', 'DrugBank_5418'}
327-
undefined_stereo = molecule.name in undefined_stereo_mols
365+
undefined_stereo = molecule.name in rdkit_drugbank_undefined_stereo_mols
328366

329367
toolkit_wrapper = RDKitToolkitWrapper()
330368

@@ -393,22 +431,24 @@ def test_to_from_iupac(self, molecule):
393431
from openforcefield.utils.toolkits import UndefinedStereochemistryError
394432

395433
# All the molecules that raise UndefinedStereochemistryError in Molecule.from_iupac()
396-
undefined_stereo_mols = {'DrugBank_977', 'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
397-
'DrugBank_2148', 'DrugBank_2178', 'DrugBank_2186', 'DrugBank_2208',
398-
'DrugBank_2519', 'DrugBank_2538', 'DrugBank_2592', 'DrugBank_2651',
399-
'DrugBank_2987', 'DrugBank_3332', 'DrugBank_3502', 'DrugBank_3622',
400-
'DrugBank_3726', 'DrugBank_3844', 'DrugBank_3930', 'DrugBank_4161',
401-
'DrugBank_4162', 'DrugBank_4778', 'DrugBank_4593', 'DrugBank_4959',
402-
'DrugBank_5043', 'DrugBank_5076', 'DrugBank_5176', 'DrugBank_5418',
403-
'DrugBank_5737', 'DrugBank_5902', 'DrugBank_6304', 'DrugBank_6305',
404-
'DrugBank_6329', 'DrugBank_6355', 'DrugBank_6401', 'DrugBank_6509',
405-
'DrugBank_6531', 'DrugBank_6647',
406-
407-
# These test cases are allowed to fail.
408-
'DrugBank_390', 'DrugBank_810', 'DrugBank_4316', 'DrugBank_4346',
409-
'DrugBank_7124'
410-
}
411-
undefined_stereo = molecule.name in undefined_stereo_mols
434+
# (This is a larger list than the normal group of undefined stereo mols, probably has
435+
# something to do with IUPAC information content)
436+
iupac_problem_mols = {'DrugBank_977', 'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
437+
'DrugBank_2148', 'DrugBank_2178', 'DrugBank_2186', 'DrugBank_2208',
438+
'DrugBank_2519', 'DrugBank_2538', 'DrugBank_2592', 'DrugBank_2651',
439+
'DrugBank_2987', 'DrugBank_3332', 'DrugBank_3502', 'DrugBank_3622',
440+
'DrugBank_3726', 'DrugBank_3844', 'DrugBank_3930', 'DrugBank_4161',
441+
'DrugBank_4162', 'DrugBank_4778', 'DrugBank_4593', 'DrugBank_4959',
442+
'DrugBank_5043', 'DrugBank_5076', 'DrugBank_5176', 'DrugBank_5418',
443+
'DrugBank_5737', 'DrugBank_5902', 'DrugBank_6304', 'DrugBank_6305',
444+
'DrugBank_6329', 'DrugBank_6355', 'DrugBank_6401', 'DrugBank_6509',
445+
'DrugBank_6531', 'DrugBank_6647',
446+
447+
# These test cases are allowed to fail.
448+
'DrugBank_390', 'DrugBank_810', 'DrugBank_4316', 'DrugBank_4346',
449+
'DrugBank_7124'
450+
}
451+
undefined_stereo = molecule.name in iupac_problem_mols
412452

413453
iupac = molecule.to_iupac()
414454

@@ -431,7 +471,7 @@ def test_to_from_topology(self, molecule):
431471
@pytest.mark.parametrize('format', [
432472
'mol2',
433473
'sdf',
434-
pytest.param('pdb', marks=pytest.mark.wip(reason='Read from pdb has not bee implemented properly yet'))
474+
pytest.param('pdb', marks=pytest.mark.wip(reason='Read from pdb has not been implemented properly yet'))
435475
])
436476
def test_to_from_file(self, molecule, format):
437477
"""Test that conversion/creation of a molecule to and from a file is consistent."""
@@ -471,12 +511,7 @@ def test_to_from_oemol(self, molecule):
471511
# known_failures = {'ZINC05964684', 'ZINC05885163', 'ZINC05543156', 'ZINC17211981',
472512
# 'ZINC17312986', 'ZINC06424847', 'ZINC04963126'}
473513

474-
# DrugBank test set known failures.
475-
undefined_stereo_mols = {'DrugBank_1634', 'DrugBank_1700', 'DrugBank_1962',
476-
'DrugBank_2519', 'DrugBank_2987', 'DrugBank_3502',
477-
'DrugBank_3930', 'DrugBank_4161', 'DrugBank_4162',
478-
'DrugBank_5043', 'DrugBank_5418', 'DrugBank_6531'}
479-
undefined_stereo = molecule.name in undefined_stereo_mols
514+
undefined_stereo = molecule.name in openeye_drugbank_undefined_stereo_mols
480515

481516
toolkit_wrapper = OpenEyeToolkitWrapper()
482517

0 commit comments

Comments
 (0)