33Created on Wed Nov 14 12:46:18 2018
44
55@author: hcji
6+
7+ Updated on Thu Oct 12 20:04:22 2023
8+
9+ @author: Jnelen
610"""
711
812import os
1115import PyFingerprint
1216
1317if not isJVMStarted ():
14- cdk_path = os .path .join (PyFingerprint .__path__ [0 ], 'CDK' , 'cdk-2.2 .jar' )
18+ cdk_path = os .path .join (PyFingerprint .__path__ [0 ], 'CDK' , 'cdk-2.9 .jar' )
1519 startJVM (getDefaultJVMPath (), "-ea" , "-Djava.class.path=%s" % cdk_path )
1620 cdk = JPackage ('org' ).openscience .cdk
1721
@@ -28,50 +32,88 @@ def cdk_parser_smiles(smi):
2832
2933def get_fingerprinter (name , size , depth ):
3034 ### This was getting made every time!
31- _fingerprinters = {"standard" :lambda : cdk .fingerprint .Fingerprinter (size , depth )
32- , "extended" :lambda : cdk .fingerprint .ExtendedFingerprinter (size , depth )
33- , "graph" :lambda : cdk .fingerprint .GraphOnlyFingerprinter (size , depth )
34- , "maccs" :lambda : cdk .fingerprint .MACCSFingerprinter ()
35- , "pubchem" :lambda : cdk .fingerprint .PubchemFingerprinter (cdk .silent .SilentChemObjectBuilder .getInstance ())
36- , "estate" :lambda : cdk .fingerprint .EStateFingerprinter ()
37- , "hybridization" :lambda : cdk .fingerprint .HybridizationFingerprinter (size , depth )
38- , "lingo" :lambda : cdk .fingerprint .LingoFingerprinter (depth )
39- , "klekota-roth" :lambda : cdk .fingerprint .KlekotaRothFingerprinter ()
40- , "shortestpath" :lambda : cdk .fingerprint .ShortestPathFingerprinter (size )
41- , "signature" : lambda : cdk .fingerprint .SignatureFingerprinter (depth )
42- , "circular" : lambda : cdk .fingerprint .CircularFingerprinter ()
43- , "substructure" : lambda : cdk .fingerprint .SubstructureFingerprinter ()
44- }
35+ ## Checking if the depth is specified. If not, we use the default CDK values where appropriate
36+ if depth == None :
37+ _fingerprinters = {"standard" :lambda : cdk .fingerprint .Fingerprinter (size , 7 )
38+ , "atompairs" : lambda : cdk .fingerprint .AtomPairs2DFingerprinter ()
39+ , "extended" :lambda : cdk .fingerprint .ExtendedFingerprinter (size , 7 )
40+ , "graph" :lambda : cdk .fingerprint .GraphOnlyFingerprinter (size , 7 )
41+ , "maccs" :lambda : cdk .fingerprint .MACCSFingerprinter ()
42+ , "pubchem" :lambda : cdk .fingerprint .PubchemFingerprinter (cdk .silent .SilentChemObjectBuilder .getInstance ())
43+ , "estate" :lambda : cdk .fingerprint .EStateFingerprinter ()
44+ , "hybridization" :lambda : cdk .fingerprint .HybridizationFingerprinter (size , 7 )
45+ , "lingo" :lambda : cdk .fingerprint .LingoFingerprinter ()
46+ , "klekota-roth" :lambda : cdk .fingerprint .KlekotaRothFingerprinter ()
47+ , "shortestpath" :lambda : cdk .fingerprint .ShortestPathFingerprinter (size )
48+ , "signature" : lambda : cdk .fingerprint .SignatureFingerprinter ()
49+ ## circular fingerprint defaults to ECFP6: https://github.com/cdk/cdk/blob/125505c5ea1f69b692183bb0aae65816e7cb44e7/descriptor/fingerprint/src/main/java/org/openscience/cdk/fingerprint/CircularFingerprinter.java
50+ , "circular" : lambda : cdk .fingerprint .CircularFingerprinter (4 ,size )
51+ , "cdk-substructure" : lambda : cdk .fingerprint .SubstructureFingerprinter ()
52+ }
53+ ## Use the user-specified settings for the fingerprint generation
54+ else :
55+ _fingerprinters = {"standard" :lambda : cdk .fingerprint .Fingerprinter (size , depth )
56+ , "atompairs" : lambda : cdk .fingerprint .AtomPairs2DFingerprinter ()
57+ , "extended" :lambda : cdk .fingerprint .ExtendedFingerprinter (size , depth )
58+ , "graph" :lambda : cdk .fingerprint .GraphOnlyFingerprinter (size , depth )
59+ , "maccs" :lambda : cdk .fingerprint .MACCSFingerprinter ()
60+ , "pubchem" :lambda : cdk .fingerprint .PubchemFingerprinter (cdk .silent .SilentChemObjectBuilder .getInstance ())
61+ , "estate" :lambda : cdk .fingerprint .EStateFingerprinter ()
62+ , "hybridization" :lambda : cdk .fingerprint .HybridizationFingerprinter (size , depth )
63+ , "lingo" :lambda : cdk .fingerprint .LingoFingerprinter (depth )
64+ , "klekota-roth" :lambda : cdk .fingerprint .KlekotaRothFingerprinter ()
65+ , "shortestpath" :lambda : cdk .fingerprint .ShortestPathFingerprinter (size )
66+ , "signature" : lambda : cdk .fingerprint .SignatureFingerprinter (depth )
67+ , "circular" : lambda : cdk .fingerprint .CircularFingerprinter (depth , size )
68+ , "cdk-substructure" : lambda : cdk .fingerprint .SubstructureFingerprinter ()
69+ }
70+
4571 if name not in _fingerprinters :
4672 raise IOError ('invalid fingerprint type' )
4773
4874 return _fingerprinters [name ]()
4975
50- def cdk_fingerprint (smi , fp_type = "standard" , size = 1024 , depth = 6 ):
51- if fp_type == 'maccs' :
52- nbit = 166
53- elif fp_type == 'estate' :
76+ def cdk_fingerprint (smi , fp_type = "standard" , size = 1024 , depth = None ):
77+
78+ mol = cdk_parser_smiles (smi )
79+ ## Sanitize input molecules, as is recommended for most fingerprints (especially shortestpath)
80+ cdk .tools .manipulator .AtomContainerManipulator .percieveAtomTypesAndConfigureAtoms (mol )
81+ cdk .tools .manipulator .AtomContainerManipulator .convertImplicitToExplicitHydrogens (mol )
82+
83+ if fp_type == 'estate' :
5484 nbit = 79
55- elif fp_type == 'cdk' :
85+ elif fp_type == 'maccs' :
86+ nbit = 166
87+ elif fp_type == 'cdk-substructure' :
5688 nbit = 307
89+ elif fp_type == 'atompairs' :
90+ nbit = 780
5791 elif fp_type == 'pubchem' :
5892 nbit = 881
5993 elif fp_type == 'klekota-roth' :
60- nbit = 4860
61- elif fp_type in [ 'lingo' , ' signature'] :
94+ nbit = 4860
95+ elif fp_type == ' signature' :
6296 nbit = None
97+ print ("Signature_FP" )
98+ fingerprinter = cdk .fingerprint .SignatureFingerprinter ()
99+ mol = cdk_parser_smiles (smi )
100+ print (fingerprinter .getSize ())
101+ print (fingerprinter .getBitFingerprint (mol ).getSetbits ())
102+ print (fingerprinter .getBitFingerprint (mol ).size ())
103+ print (fingerprinter .getRawFingerprint (mol ))
104+
63105 else :
64106 nbit = size
65107
66- mol = cdk_parser_smiles ( smi )
108+
67109
68110 # Pull from cache if it exists
69111 if (fp_type , size , depth ) in fp_map :
70112 fingerprinter = fp_map [(fp_type , size , depth )]
71113 else :
72114 fingerprinter = get_fingerprinter (fp_type , size , depth )
73115 fp_map [(fp_type , size , depth )] = fingerprinter
74-
116+
75117 fp_obj = fingerprinter .getBitFingerprint (mol )
76118 bits = list (fp_obj .getSetbits ())
77119 return bits , nbit
0 commit comments