@@ -192,6 +192,26 @@ def mini_drug_bank(xfail_mols=None, wip_mols=None):
192192# used inside pytest.mark.parametrize (see issue #349 in pytest).
193193mini_drug_bank .molecules = None
194194
195+ # All the molecules that raise UndefinedStereochemistryError when read by OETK()
196+ openeye_drugbank_undefined_stereo_mols = {'DrugBank_1634' , 'DrugBank_1700' , 'DrugBank_1962' ,
197+ 'DrugBank_2519' , 'DrugBank_2987' , 'DrugBank_3502' ,
198+ 'DrugBank_3930' , 'DrugBank_4161' , 'DrugBank_4162' ,
199+ 'DrugBank_5043' , 'DrugBank_5418' , 'DrugBank_6531' }
200+
201+ # All the molecules that raise UndefinedStereochemistryError when read by OETK().
202+ # Note that this list is different from that for OEMol,
203+ # since the toolkits have different definitions of "stereogenic"
204+ rdkit_drugbank_undefined_stereo_mols = {'DrugBank_1634' , 'DrugBank_1962' , 'DrugBank_2519' ,
205+ 'DrugBank_3930' , 'DrugBank_5043' , 'DrugBank_5418' }
206+
207+
208+ # Missing stereo in OE but not RDK: 'DrugBank_2987', 'DrugBank_3502', 'DrugBank_4161',
209+ # 'DrugBank_4162', 'DrugBank_6531', 'DrugBank_1700',
210+
211+ # Some molecules are _valid_ in both OETK and RDKit, but will fail if you try
212+ # to convert from one to the other, since OE adds stereo that RDKit doesn't
213+ drugbank_stereogenic_in_oe_but_not_rdkit = {'DrugBank_1598' , 'DrugBank_4346' , 'DrugBank_1849' ,
214+ 'DrugBank_2141' }
195215
196216#=============================================================================================
197217# TESTS
@@ -260,12 +280,36 @@ def test_create_copy(self, molecule):
260280 molecule_copy = Molecule (molecule )
261281 assert molecule_copy == molecule
262282
283+ @pytest .mark .parametrize ('toolkit' , [OpenEyeToolkitWrapper , RDKitToolkitWrapper ])
263284 @pytest .mark .parametrize ('molecule' , mini_drug_bank ())
264- def test_from_smiles (self , molecule ):
285+ def test_to_from_smiles (self , molecule , toolkit ):
265286 """Test round-trip creation from SMILES"""
266- smiles1 = molecule .to_smiles ()
267- molecule2 = Molecule .from_smiles (smiles1 )
268- smiles2 = molecule2 .to_smiles ()
287+ if not toolkit .is_available ():
288+ pytest .skip ('Required toolkit is unavailable' )
289+
290+ if toolkit == RDKitToolkitWrapper :
291+ # Skip the test if OpenEye assigns stereochemistry but RDKit doesn't (since then, the
292+ # OFF molecule will be loaded, but fail to convert in to_rdkit)
293+ if molecule .name in drugbank_stereogenic_in_oe_but_not_rdkit :
294+ pytest .skip ('Molecle is stereogenic in OpenEye (which loaded this dataset), but not RDKit, so it '
295+ 'is impossible to make a valid RDMol in this test' )
296+ undefined_stereo_mols = rdkit_drugbank_undefined_stereo_mols
297+ elif toolkit == OpenEyeToolkitWrapper :
298+ undefined_stereo_mols = openeye_drugbank_undefined_stereo_mols
299+
300+ toolkit_wrapper = toolkit ()
301+
302+ undefined_stereo = molecule .name in undefined_stereo_mols
303+
304+ smiles1 = molecule .to_smiles (toolkit_registry = toolkit_wrapper )
305+ if undefined_stereo :
306+ molecule2 = Molecule .from_smiles (smiles1 ,
307+ allow_undefined_stereo = True ,
308+ toolkit_registry = toolkit_wrapper )
309+ else :
310+ molecule2 = Molecule .from_smiles (smiles1 ,
311+ toolkit_registry = toolkit_wrapper )
312+ smiles2 = molecule2 .to_smiles (toolkit_registry = toolkit_wrapper )
269313 assert (smiles1 == smiles2 )
270314
271315 # TODO: Should there be an equivalent toolkit test and leave this as an integration test?
@@ -318,13 +362,7 @@ def test_to_from_rdkit(self, molecule):
318362 # import pickle
319363 from openforcefield .utils .toolkits import UndefinedStereochemistryError
320364
321- # DrugBank test set known failures. Note that this list is different from that for OEMol,
322- # since the toolkits have different definitions of "stereogenic"
323- # Stereogenic in OE but not RDK: 'DrugBank_2987', 'DrugBank_3502', 'DrugBank_4161',
324- # 'DrugBank_4162', 'DrugBank_6531', 'DrugBank_1700',
325- undefined_stereo_mols = {'DrugBank_1634' , 'DrugBank_1962' , 'DrugBank_2519' ,
326- 'DrugBank_3930' , 'DrugBank_5043' , 'DrugBank_5418' }
327- undefined_stereo = molecule .name in undefined_stereo_mols
365+ undefined_stereo = molecule .name in rdkit_drugbank_undefined_stereo_mols
328366
329367 toolkit_wrapper = RDKitToolkitWrapper ()
330368
@@ -393,22 +431,24 @@ def test_to_from_iupac(self, molecule):
393431 from openforcefield .utils .toolkits import UndefinedStereochemistryError
394432
395433 # All the molecules that raise UndefinedStereochemistryError in Molecule.from_iupac()
396- undefined_stereo_mols = {'DrugBank_977' , 'DrugBank_1634' , 'DrugBank_1700' , 'DrugBank_1962' ,
397- 'DrugBank_2148' , 'DrugBank_2178' , 'DrugBank_2186' , 'DrugBank_2208' ,
398- 'DrugBank_2519' , 'DrugBank_2538' , 'DrugBank_2592' , 'DrugBank_2651' ,
399- 'DrugBank_2987' , 'DrugBank_3332' , 'DrugBank_3502' , 'DrugBank_3622' ,
400- 'DrugBank_3726' , 'DrugBank_3844' , 'DrugBank_3930' , 'DrugBank_4161' ,
401- 'DrugBank_4162' , 'DrugBank_4778' , 'DrugBank_4593' , 'DrugBank_4959' ,
402- 'DrugBank_5043' , 'DrugBank_5076' , 'DrugBank_5176' , 'DrugBank_5418' ,
403- 'DrugBank_5737' , 'DrugBank_5902' , 'DrugBank_6304' , 'DrugBank_6305' ,
404- 'DrugBank_6329' , 'DrugBank_6355' , 'DrugBank_6401' , 'DrugBank_6509' ,
405- 'DrugBank_6531' , 'DrugBank_6647' ,
406-
407- # These test cases are allowed to fail.
408- 'DrugBank_390' , 'DrugBank_810' , 'DrugBank_4316' , 'DrugBank_4346' ,
409- 'DrugBank_7124'
410- }
411- undefined_stereo = molecule .name in undefined_stereo_mols
434+ # (This is a larger list than the normal group of undefined stereo mols, probably has
435+ # something to do with IUPAC information content)
436+ iupac_problem_mols = {'DrugBank_977' , 'DrugBank_1634' , 'DrugBank_1700' , 'DrugBank_1962' ,
437+ 'DrugBank_2148' , 'DrugBank_2178' , 'DrugBank_2186' , 'DrugBank_2208' ,
438+ 'DrugBank_2519' , 'DrugBank_2538' , 'DrugBank_2592' , 'DrugBank_2651' ,
439+ 'DrugBank_2987' , 'DrugBank_3332' , 'DrugBank_3502' , 'DrugBank_3622' ,
440+ 'DrugBank_3726' , 'DrugBank_3844' , 'DrugBank_3930' , 'DrugBank_4161' ,
441+ 'DrugBank_4162' , 'DrugBank_4778' , 'DrugBank_4593' , 'DrugBank_4959' ,
442+ 'DrugBank_5043' , 'DrugBank_5076' , 'DrugBank_5176' , 'DrugBank_5418' ,
443+ 'DrugBank_5737' , 'DrugBank_5902' , 'DrugBank_6304' , 'DrugBank_6305' ,
444+ 'DrugBank_6329' , 'DrugBank_6355' , 'DrugBank_6401' , 'DrugBank_6509' ,
445+ 'DrugBank_6531' , 'DrugBank_6647' ,
446+
447+ # These test cases are allowed to fail.
448+ 'DrugBank_390' , 'DrugBank_810' , 'DrugBank_4316' , 'DrugBank_4346' ,
449+ 'DrugBank_7124'
450+ }
451+ undefined_stereo = molecule .name in iupac_problem_mols
412452
413453 iupac = molecule .to_iupac ()
414454
@@ -431,7 +471,7 @@ def test_to_from_topology(self, molecule):
431471 @pytest .mark .parametrize ('format' , [
432472 'mol2' ,
433473 'sdf' ,
434- pytest .param ('pdb' , marks = pytest .mark .wip (reason = 'Read from pdb has not bee implemented properly yet' ))
474+ pytest .param ('pdb' , marks = pytest .mark .wip (reason = 'Read from pdb has not been implemented properly yet' ))
435475 ])
436476 def test_to_from_file (self , molecule , format ):
437477 """Test that conversion/creation of a molecule to and from a file is consistent."""
@@ -471,12 +511,7 @@ def test_to_from_oemol(self, molecule):
471511 # known_failures = {'ZINC05964684', 'ZINC05885163', 'ZINC05543156', 'ZINC17211981',
472512 # 'ZINC17312986', 'ZINC06424847', 'ZINC04963126'}
473513
474- # DrugBank test set known failures.
475- undefined_stereo_mols = {'DrugBank_1634' , 'DrugBank_1700' , 'DrugBank_1962' ,
476- 'DrugBank_2519' , 'DrugBank_2987' , 'DrugBank_3502' ,
477- 'DrugBank_3930' , 'DrugBank_4161' , 'DrugBank_4162' ,
478- 'DrugBank_5043' , 'DrugBank_5418' , 'DrugBank_6531' }
479- undefined_stereo = molecule .name in undefined_stereo_mols
514+ undefined_stereo = molecule .name in openeye_drugbank_undefined_stereo_mols
480515
481516 toolkit_wrapper = OpenEyeToolkitWrapper ()
482517
0 commit comments