Skip to content

Commit

Permalink
Merge pull request #226 from datamol-io/fix/fp
Browse files Browse the repository at this point in the history
refactor to rdFingerprintGenerator
  • Loading branch information
zhu0619 authored Jun 10, 2024
2 parents cc3cc36 + 9210ec9 commit 3864d9d
Show file tree
Hide file tree
Showing 9 changed files with 575 additions and 115 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
matrix:
python-version: ["3.10", "3.11"]
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
rdkit-version: ["2023.03", "2023.09"]
rdkit-version: ["2023.09", "2024.03"]

runs-on: ${{ matrix.os }}
timeout-minutes: 30
Expand Down
4 changes: 3 additions & 1 deletion datamol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"parallelized_with_batches": "datamol.utils",
"JobRunner": "datamol.utils",
"fs": "datamol.utils",
# log
# data
"freesolv": "datamol.data",
"cdk2": "datamol.data",
"solubility": "datamol.data",
Expand All @@ -39,6 +39,7 @@
"enable_rdkit_log": "datamol.log",
"disable_rdkit_log": "datamol.log",
"without_rdkit_log": "datamol.log",
"no_rdkit_log": "datamol.log",
# mol
"PERIODIC_TABLE": "datamol.mol",
"TRIPLE_BOND": "datamol.mol",
Expand Down Expand Up @@ -233,6 +234,7 @@ def __dir__():
from .log import enable_rdkit_log
from .log import disable_rdkit_log
from .log import without_rdkit_log
from .log import no_rdkit_log

from .mol import PERIODIC_TABLE
from .mol import TRIPLE_BOND
Expand Down
2 changes: 2 additions & 0 deletions datamol/descriptors/descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@

from .. import Mol
from ..convert import from_smarts
from ..log import no_rdkit_log
from .._version import is_lower_than_current_rdkit_version


@no_rdkit_log
def _sasscorer(mol: Mol):
sys.path.append(os.path.join(RDConfig.RDContribDir, "SA_Score"))
try:
Expand Down
178 changes: 95 additions & 83 deletions datamol/fp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from rdkit.Chem import rdReducedGraphs
from rdkit.Chem.EState import Fingerprinter as EStateFingerprinter
from rdkit.Avalon import pyAvalonTools
from rdkit.Chem import rdFingerprintGenerator

from rdkit.DataStructs.cDataStructs import SparseBitVect
from rdkit.DataStructs.cDataStructs import UIntSparseIntVect
Expand All @@ -20,25 +21,28 @@
from rdkit.DataStructs.cDataStructs import ExplicitBitVect
from rdkit.DataStructs.cDataStructs import ULongSparseIntVect

_FP_GENERATORS = {
"ecfp": rdFingerprintGenerator.GetMorganGenerator,
"fcfp": rdFingerprintGenerator.GetMorganGenerator,
"topological": rdFingerprintGenerator.GetTopologicalTorsionGenerator,
"atompair": rdFingerprintGenerator.GetAtomPairGenerator,
"rdkit": rdFingerprintGenerator.GetRDKitFPGenerator,
"ecfp-count": rdFingerprintGenerator.GetMorganGenerator,
"fcfp-count": rdFingerprintGenerator.GetMorganGenerator,
"topological-count": rdFingerprintGenerator.GetTopologicalTorsionGenerator,
"atompair-count": rdFingerprintGenerator.GetAtomPairGenerator,
"rdkit-count": rdFingerprintGenerator.GetRDKitFPGenerator,
}

_FP_FUNCS = {
"maccs": rdMolDescriptors.GetMACCSKeysFingerprint,
"ecfp": rdMolDescriptors.GetMorganFingerprintAsBitVect,
"fcfp": rdMolDescriptors.GetMorganFingerprintAsBitVect,
"topological": rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect,
"atompair": rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect,
"rdkit": rdmolops.RDKFingerprint,
"pattern": rdmolops.PatternFingerprint,
"layered": rdmolops.LayeredFingerprint,
"erg": rdReducedGraphs.GetErGFingerprint,
# NOTE(hadim): bad for pickling?
"estate": lambda x, **args: EStateFingerprinter.FingerprintMol(x)[0],
"avalon-count": pyAvalonTools.GetAvalonCountFP,
"rdkit-count": rdmolops.UnfoldedRDKFingerprintCountBased,
"ecfp-count": rdMolDescriptors.GetHashedMorganFingerprint,
"fcfp-count": rdMolDescriptors.GetHashedMorganFingerprint,
"topological-count": rdMolDescriptors.GetHashedTopologicalTorsionFingerprint,
"atompair-count": rdMolDescriptors.GetHashedAtomPairFingerprint,
**_FP_GENERATORS,
}

_FP_DEFAULT_ARGS = {
Expand All @@ -51,59 +55,60 @@
},
"ecfp": {
"radius": 3, # ECFP6 - not the RDKit default (ECFP4)
"nBits": 2048,
"invariants": [],
"fromAtoms": [],
"useChirality": False,
"fpSize": 2048,
"includeChirality": False,
"useBondTypes": True,
"useFeatures": False,
"countSimulation": False,
"countBounds": None,
"atomInvariantsGenerator": None,
"bondInvariantsGenerator": None,
},
"fcfp": {
"radius": 2, # FCFP4
"nBits": 2048,
"invariants": [], # you may want to provide features invariance
"fromAtoms": [],
"useChirality": False,
"radius": 2,
"fpSize": 2048,
"includeChirality": False,
"useBondTypes": True,
"useFeatures": True,
"countSimulation": False,
"countBounds": None,
"atomInvariantsGenerator": rdFingerprintGenerator.GetMorganFeatureAtomInvGen(),
"bondInvariantsGenerator": None,
},
"topological": {
"nBits": 2048,
"targetSize": 4,
"fromAtoms": 0,
"ignoreAtoms": 0,
"atomInvariants": 0,
"nBitsPerEntry": 4,
"includeChirality": False,
"torsionAtomCount": 4,
"countSimulation": True,
"countBounds": None,
"fpSize": 2048,
"atomInvariantsGenerator": None,
},
"atompair": {
"nBits": 2048,
"minLength": 1,
"maxLength": 30,
"fromAtoms": 0,
"ignoreAtoms": 0,
"atomInvariants": 0,
"nBitsPerEntry": 4,
"minDistance": 1,
"maxDistance": 30,
"includeChirality": False,
"use2D": True,
"confId": -1,
"countSimulation": True,
"countBounds": None,
"fpSize": 2048,
"atomInvariantsGenerator": None,
},
"rdkit": {
"minPath": 1,
"maxPath": 7,
"fpSize": 2048,
"nBitsPerHash": 2,
"useHs": True,
"tgtDensity": 0.0,
"minSize": 128,
"branchedPaths": True,
"useBondOrder": True,
"atomInvariants": 0,
"fromAtoms": 0,
"atomBits": None,
"bitInfo": None,
"countSimulation": False,
"countBounds": None,
"fpSize": 2048,
"numBitsPerFeature": 2,
"atomInvariantsGenerator": None,
},
"pattern": {
"fpSize": 2048,
"atomCounts": [],
"setOnlyBits": None,
"tautomerFingerprints": False,
},
"pattern": {"fpSize": 2048, "atomCounts": [], "setOnlyBits": None},
"layered": {
"fpSize": 2048,
"minPath": 1,
Expand All @@ -126,60 +131,60 @@
"erg": {"atomTypes": 0, "fuzzIncrement": 0.3, "minPath": 1, "maxPath": 15},
"estate": {},
# COUNTING FP
"avalon-count": {
"nBits": 512,
"isQuery": False,
"bitFlags": pyAvalonTools.avalonSimilarityBits,
},
"ecfp-count": {
"radius": 2, # ECFP4
"nBits": 2048,
"invariants": [],
"fromAtoms": [],
"useChirality": False,
"radius": 3, # ECFP6 - not the RDKit default (ECFP4)
"fpSize": 2048,
"includeChirality": False,
"useBondTypes": True,
"useFeatures": False,
"includeRedundantEnvironments": False,
"countSimulation": False,
"countBounds": None,
"atomInvariantsGenerator": None,
"bondInvariantsGenerator": None,
},
"fcfp-count": {
"radius": 2, # FCFP4
"nBits": 2048,
"invariants": [], # you may want to provide features invariance
"fromAtoms": [],
"useChirality": False,
"radius": 2,
"fpSize": 2048,
"includeChirality": False,
"useBondTypes": True,
"useFeatures": True,
"includeRedundantEnvironments": False,
"countSimulation": False,
"countBounds": None,
"atomInvariantsGenerator": rdFingerprintGenerator.GetMorganFeatureAtomInvGen(),
"bondInvariantsGenerator": None,
},
"topological-count": {
"nBits": 2048,
"targetSize": 4,
"fromAtoms": 0,
"ignoreAtoms": 0,
"atomInvariants": 0,
"includeChirality": False,
"torsionAtomCount": 4,
"countSimulation": True,
"countBounds": None,
"fpSize": 2048,
"atomInvariantsGenerator": None,
},
"avalon-count": {
"nBits": 512,
"isQuery": False,
"bitFlags": pyAvalonTools.avalonSimilarityBits,
"atompair-count": {
"minDistance": 1,
"maxDistance": 30,
"includeChirality": False,
"use2D": True,
"countSimulation": True,
"countBounds": None,
"fpSize": 2048,
"atomInvariantsGenerator": None,
},
"rdkit-count": {
"minPath": 1,
"maxPath": 7,
"useHs": True,
"branchedPaths": True,
"useBondOrder": True,
"atomInvariants": 0,
"fromAtoms": 0,
"atomBits": None,
"bitInfo": None,
},
"atompair-count": {
"nBits": 2048,
"minLength": 1,
"maxLength": 30,
"fromAtoms": 0,
"ignoreAtoms": 0,
"atomInvariants": 0,
"includeChirality": False,
"use2D": True,
"confId": -1,
"countSimulation": False,
"countBounds": None,
"fpSize": 2048,
"numBitsPerFeature": 1,
"atomInvariantsGenerator": None,
},
}

Expand Down Expand Up @@ -279,7 +284,14 @@ def to_fp(
fp_args.setdefault(key, value)

# Compute the fingerprint
fp = fp_func(mol, **fp_args)
if fp_type in _FP_GENERATORS:
fp_func = fp_func(**fp_args)
if fp_type.endswith("-count"):
fp = fp_func.GetCountFingerprint(mol)
else:
fp = fp_func.GetFingerprint(mol)
else:
fp = fp_func(mol, **fp_args)

# Fold the fp if needed.
if fold_size is not None:
Expand Down
51 changes: 51 additions & 0 deletions datamol/log.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from rdkit import RDLogger
from rdkit import rdBase
from functools import wraps


class without_rdkit_log:
Expand Down Expand Up @@ -71,3 +72,53 @@ def enable_rdkit_log():
"""Enable all rdkit logs."""
for log_level in RDLogger._levels:
rdBase.EnableLog(log_level)


def no_rdkit_log(
func=None,
*,
mute_errors: bool = True,
mute_warning: bool = True,
mute_info: bool = True,
mute_debug: bool = True,
enable: bool = True,
):
"""Decorator to disable RDKit logs.
This decorator can be used to suppress RDKit logs when executing a specific function.
By default, all log levels (error, warning, info, and debug) are muted.
Args:
mute_errors : Whether to mute error logs (default is True).
mute_warning : Whether to mute warning logs (default is True).
mute_info : Whether to mute info logs (default is True).
mute_debug : Whether to mute debug logs (default is True).
enable: Whether to enable the log muting (default is True). If set to False, no logs will be muted.
Example:
```python
@no_rdkit_log()
def example_function():
# Your function code here
pass
example_function() # RDKit logs won't show during this function's execution
```
"""

if func is None:
return lambda f: no_rdkit_log(
f,
mute_errors=mute_errors,
mute_warning=mute_warning,
mute_info=mute_info,
mute_debug=mute_debug,
enable=enable,
)

@wraps(func)
def wrapper(*args, **kwargs):
with without_rdkit_log(mute_errors, mute_warning, mute_info, mute_debug, enable):
return func(*args, **kwargs)

return wrapper
432 changes: 405 additions & 27 deletions docs/tutorials/Descriptors.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ dependencies:
- scikit-learn

# Chemistry
- rdkit >=2021.03
- rdkit
- selfies

# Optional deps
Expand All @@ -46,7 +46,7 @@ dependencies:
- nbconvert

# Doc
- mkdocs
- mkdocs <1.6
- mkdocs-material >=7.1.1
- mkdocs-material-extensions
- mkdocstrings
Expand Down
2 changes: 1 addition & 1 deletion tests/test_fp.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_all_fps():
"erg": {"size": 315, "bits_sum": 23.4},
"estate": {"size": 79, "bits_sum": 13},
"avalon-count": {"size": 512, "bits_sum": 168},
"ecfp-count": {"size": 2048, "bits_sum": 35},
"ecfp-count": {"size": 2048, "bits_sum": 42},
"fcfp-count": {"size": 2048, "bits_sum": 35},
"topological-count": {"size": 2048, "bits_sum": 19},
"atompair-count": {"size": 2048, "bits_sum": 78},
Expand Down
Loading

0 comments on commit 3864d9d

Please sign in to comment.