Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor to rdFingerprintGenerator #226

Merged
merged 6 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
matrix:
python-version: ["3.10", "3.11"]
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
rdkit-version: ["2023.03", "2023.09"]
rdkit-version: ["2023.09", "2024.03"]

runs-on: ${{ matrix.os }}
timeout-minutes: 30
Expand Down
4 changes: 3 additions & 1 deletion datamol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"parallelized_with_batches": "datamol.utils",
"JobRunner": "datamol.utils",
"fs": "datamol.utils",
# log
# data
"freesolv": "datamol.data",
"cdk2": "datamol.data",
"solubility": "datamol.data",
Expand All @@ -39,6 +39,7 @@
"enable_rdkit_log": "datamol.log",
"disable_rdkit_log": "datamol.log",
"without_rdkit_log": "datamol.log",
"no_rdkit_log": "datamol.log",
# mol
"PERIODIC_TABLE": "datamol.mol",
"TRIPLE_BOND": "datamol.mol",
Expand Down Expand Up @@ -233,6 +234,7 @@ def __dir__():
from .log import enable_rdkit_log
from .log import disable_rdkit_log
from .log import without_rdkit_log
from .log import no_rdkit_log

from .mol import PERIODIC_TABLE
from .mol import TRIPLE_BOND
Expand Down
2 changes: 2 additions & 0 deletions datamol/descriptors/descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@

from .. import Mol
from ..convert import from_smarts
from ..log import no_rdkit_log
from .._version import is_lower_than_current_rdkit_version


@no_rdkit_log
def _sasscorer(mol: Mol):
sys.path.append(os.path.join(RDConfig.RDContribDir, "SA_Score"))
try:
Expand Down
178 changes: 95 additions & 83 deletions datamol/fp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from rdkit.Chem import rdReducedGraphs
from rdkit.Chem.EState import Fingerprinter as EStateFingerprinter
from rdkit.Avalon import pyAvalonTools
from rdkit.Chem import rdFingerprintGenerator

from rdkit.DataStructs.cDataStructs import SparseBitVect
from rdkit.DataStructs.cDataStructs import UIntSparseIntVect
Expand All @@ -20,25 +21,28 @@
from rdkit.DataStructs.cDataStructs import ExplicitBitVect
from rdkit.DataStructs.cDataStructs import ULongSparseIntVect

_FP_GENERATORS = {
"ecfp": rdFingerprintGenerator.GetMorganGenerator,
"fcfp": rdFingerprintGenerator.GetMorganGenerator,
"topological": rdFingerprintGenerator.GetTopologicalTorsionGenerator,
"atompair": rdFingerprintGenerator.GetAtomPairGenerator,
"rdkit": rdFingerprintGenerator.GetRDKitFPGenerator,
"ecfp-count": rdFingerprintGenerator.GetMorganGenerator,
"fcfp-count": rdFingerprintGenerator.GetMorganGenerator,
maclandrol marked this conversation as resolved.
Show resolved Hide resolved
"topological-count": rdFingerprintGenerator.GetTopologicalTorsionGenerator,
"atompair-count": rdFingerprintGenerator.GetAtomPairGenerator,
"rdkit-count": rdFingerprintGenerator.GetRDKitFPGenerator,
}

_FP_FUNCS = {
"maccs": rdMolDescriptors.GetMACCSKeysFingerprint,
"ecfp": rdMolDescriptors.GetMorganFingerprintAsBitVect,
"fcfp": rdMolDescriptors.GetMorganFingerprintAsBitVect,
"topological": rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect,
"atompair": rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect,
"rdkit": rdmolops.RDKFingerprint,
"pattern": rdmolops.PatternFingerprint,
"layered": rdmolops.LayeredFingerprint,
"erg": rdReducedGraphs.GetErGFingerprint,
# NOTE(hadim): bad for pickling?
"estate": lambda x, **args: EStateFingerprinter.FingerprintMol(x)[0],
"avalon-count": pyAvalonTools.GetAvalonCountFP,
"rdkit-count": rdmolops.UnfoldedRDKFingerprintCountBased,
"ecfp-count": rdMolDescriptors.GetHashedMorganFingerprint,
"fcfp-count": rdMolDescriptors.GetHashedMorganFingerprint,
"topological-count": rdMolDescriptors.GetHashedTopologicalTorsionFingerprint,
"atompair-count": rdMolDescriptors.GetHashedAtomPairFingerprint,
**_FP_GENERATORS,
}

_FP_DEFAULT_ARGS = {
Expand All @@ -51,59 +55,60 @@
},
"ecfp": {
"radius": 3, # ECFP6 - not the RDKit default (ECFP4)
"nBits": 2048,
"invariants": [],
"fromAtoms": [],
"useChirality": False,
"fpSize": 2048,
"includeChirality": False,
"useBondTypes": True,
"useFeatures": False,
"countSimulation": False,
"countBounds": None,
"atomInvariantsGenerator": None,
"bondInvariantsGenerator": None,
},
"fcfp": {
"radius": 2, # FCFP4
"nBits": 2048,
"invariants": [], # you may want to provide features invariance
"fromAtoms": [],
"useChirality": False,
"radius": 2,
"fpSize": 2048,
"includeChirality": False,
"useBondTypes": True,
"useFeatures": True,
"countSimulation": False,
"countBounds": None,
"atomInvariantsGenerator": rdFingerprintGenerator.GetMorganFeatureAtomInvGen(),
"bondInvariantsGenerator": None,
},
"topological": {
"nBits": 2048,
"targetSize": 4,
"fromAtoms": 0,
"ignoreAtoms": 0,
"atomInvariants": 0,
"nBitsPerEntry": 4,
"includeChirality": False,
"torsionAtomCount": 4,
"countSimulation": True,
"countBounds": None,
"fpSize": 2048,
"atomInvariantsGenerator": None,
},
"atompair": {
"nBits": 2048,
"minLength": 1,
"maxLength": 30,
"fromAtoms": 0,
"ignoreAtoms": 0,
"atomInvariants": 0,
"nBitsPerEntry": 4,
"minDistance": 1,
"maxDistance": 30,
"includeChirality": False,
"use2D": True,
"confId": -1,
"countSimulation": True,
"countBounds": None,
"fpSize": 2048,
"atomInvariantsGenerator": None,
},
"rdkit": {
"minPath": 1,
"maxPath": 7,
"fpSize": 2048,
"nBitsPerHash": 2,
"useHs": True,
"tgtDensity": 0.0,
"minSize": 128,
"branchedPaths": True,
"useBondOrder": True,
"atomInvariants": 0,
"fromAtoms": 0,
"atomBits": None,
"bitInfo": None,
"countSimulation": False,
"countBounds": None,
"fpSize": 2048,
"numBitsPerFeature": 2,
"atomInvariantsGenerator": None,
},
"pattern": {
"fpSize": 2048,
"atomCounts": [],
"setOnlyBits": None,
"tautomerFingerprints": False,
},
"pattern": {"fpSize": 2048, "atomCounts": [], "setOnlyBits": None},
"layered": {
"fpSize": 2048,
"minPath": 1,
Expand All @@ -126,60 +131,60 @@
"erg": {"atomTypes": 0, "fuzzIncrement": 0.3, "minPath": 1, "maxPath": 15},
"estate": {},
# COUNTING FP
"avalon-count": {
"nBits": 512,
"isQuery": False,
"bitFlags": pyAvalonTools.avalonSimilarityBits,
},
"ecfp-count": {
maclandrol marked this conversation as resolved.
Show resolved Hide resolved
"radius": 2, # ECFP4
"nBits": 2048,
"invariants": [],
"fromAtoms": [],
"useChirality": False,
"radius": 3, # ECFP6 - not the RDKit default (ECFP4)
"fpSize": 2048,
"includeChirality": False,
"useBondTypes": True,
"useFeatures": False,
"includeRedundantEnvironments": False,
"countSimulation": False,
"countBounds": None,
"atomInvariantsGenerator": None,
"bondInvariantsGenerator": None,
},
"fcfp-count": {
"radius": 2, # FCFP4
"nBits": 2048,
"invariants": [], # you may want to provide features invariance
"fromAtoms": [],
"useChirality": False,
"radius": 2,
"fpSize": 2048,
"includeChirality": False,
"useBondTypes": True,
"useFeatures": True,
"includeRedundantEnvironments": False,
"countSimulation": False,
"countBounds": None,
"atomInvariantsGenerator": rdFingerprintGenerator.GetMorganFeatureAtomInvGen(),
"bondInvariantsGenerator": None,
},
"topological-count": {
"nBits": 2048,
"targetSize": 4,
"fromAtoms": 0,
"ignoreAtoms": 0,
"atomInvariants": 0,
"includeChirality": False,
"torsionAtomCount": 4,
"countSimulation": True,
"countBounds": None,
"fpSize": 2048,
"atomInvariantsGenerator": None,
},
"avalon-count": {
"nBits": 512,
"isQuery": False,
"bitFlags": pyAvalonTools.avalonSimilarityBits,
"atompair-count": {
"minDistance": 1,
"maxDistance": 30,
"includeChirality": False,
"use2D": True,
"countSimulation": True,
"countBounds": None,
"fpSize": 2048,
"atomInvariantsGenerator": None,
},
"rdkit-count": {
"minPath": 1,
"maxPath": 7,
"useHs": True,
"branchedPaths": True,
"useBondOrder": True,
"atomInvariants": 0,
"fromAtoms": 0,
"atomBits": None,
"bitInfo": None,
},
"atompair-count": {
"nBits": 2048,
"minLength": 1,
"maxLength": 30,
"fromAtoms": 0,
"ignoreAtoms": 0,
"atomInvariants": 0,
"includeChirality": False,
"use2D": True,
"confId": -1,
"countSimulation": False,
"countBounds": None,
"fpSize": 2048,
"numBitsPerFeature": 1,
"atomInvariantsGenerator": None,
},
}

Expand Down Expand Up @@ -279,7 +284,14 @@ def to_fp(
fp_args.setdefault(key, value)

# Compute the fingerprint
fp = fp_func(mol, **fp_args)
if fp_type in _FP_GENERATORS:
fp_func = fp_func(**fp_args)
if fp_type.endswith("-count"):
fp = fp_func.GetCountFingerprint(mol)
else:
fp = fp_func.GetFingerprint(mol)
else:
fp = fp_func(mol, **fp_args)

# Fold the fp if needed.
if fold_size is not None:
Expand Down
51 changes: 51 additions & 0 deletions datamol/log.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from rdkit import RDLogger
from rdkit import rdBase
from functools import wraps


class without_rdkit_log:
Expand Down Expand Up @@ -71,3 +72,53 @@ def enable_rdkit_log():
"""Enable all rdkit logs."""
for log_level in RDLogger._levels:
rdBase.EnableLog(log_level)


def no_rdkit_log(
func=None,
*,
mute_errors: bool = True,
mute_warning: bool = True,
mute_info: bool = True,
mute_debug: bool = True,
enable: bool = True,
):
"""Decorator to disable RDKit logs.

This decorator can be used to suppress RDKit logs when executing a specific function.
By default, all log levels (error, warning, info, and debug) are muted.

Args:
mute_errors : Whether to mute error logs (default is True).
mute_warning : Whether to mute warning logs (default is True).
mute_info : Whether to mute info logs (default is True).
mute_debug : Whether to mute debug logs (default is True).
enable: Whether to enable the log muting (default is True). If set to False, no logs will be muted.

Example:
```python
@no_rdkit_log()
def example_function():
# Your function code here
pass

example_function() # RDKit logs won't show during this function's execution
```
"""

if func is None:
return lambda f: no_rdkit_log(
f,
mute_errors=mute_errors,
mute_warning=mute_warning,
mute_info=mute_info,
mute_debug=mute_debug,
enable=enable,
)

@wraps(func)
def wrapper(*args, **kwargs):
with without_rdkit_log(mute_errors, mute_warning, mute_info, mute_debug, enable):
return func(*args, **kwargs)

return wrapper
432 changes: 405 additions & 27 deletions docs/tutorials/Descriptors.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ dependencies:
- scikit-learn

# Chemistry
- rdkit >=2021.03
- rdkit
- selfies

# Optional deps
Expand All @@ -46,7 +46,7 @@ dependencies:
- nbconvert

# Doc
- mkdocs
- mkdocs <1.6
- mkdocs-material >=7.1.1
- mkdocs-material-extensions
- mkdocstrings
Expand Down
2 changes: 1 addition & 1 deletion tests/test_fp.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_all_fps():
"erg": {"size": 315, "bits_sum": 23.4},
"estate": {"size": 79, "bits_sum": 13},
"avalon-count": {"size": 512, "bits_sum": 168},
"ecfp-count": {"size": 2048, "bits_sum": 35},
"ecfp-count": {"size": 2048, "bits_sum": 42},
"fcfp-count": {"size": 2048, "bits_sum": 35},
"topological-count": {"size": 2048, "bits_sum": 19},
"atompair-count": {"size": 2048, "bits_sum": 78},
Expand Down
Loading
Loading