Skip to content

Commit

Permalink
Merge pull request #208 from datamol-io/more_datasets
Browse files Browse the repository at this point in the history
Add two more datasets + ruff linting
  • Loading branch information
hadim authored Jul 27, 2023
2 parents fc0431b + be52130 commit 2619c61
Show file tree
Hide file tree
Showing 27 changed files with 4,063 additions and 62 deletions.
18 changes: 6 additions & 12 deletions .github/workflows/code-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ jobs:
- name: Lint
run: black --check .

python-typing-mypy:
name: Python typing check [mypy]
python-lint-ruff:
name: Python lint [ruff]
runs-on: ubuntu-latest
steps:
- name: Checkout the code
Expand All @@ -41,15 +41,9 @@ jobs:
with:
python-version: "3.10"

- name: Install mypy
- name: Install ruff
run: |
pip install mypy numpy pandas loguru pytest pillow scipy
pip install ruff
- name: Run code check
run: |
mypy . || exitCode=$?
# only fails if exit code >=2
if [ $exitCode -ge 2 ]; then
exit $exitCode
fi
- name: Lint
run: ruff .
4 changes: 4 additions & 0 deletions datamol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
"freesolv": "datamol.data",
"cdk2": "datamol.data",
"solubility": "datamol.data",
"chembl_drugs": "datamol.data",
"chembl_samples": "datamol.data",
# log
"enable_rdkit_log": "datamol.log",
"disable_rdkit_log": "datamol.log",
Expand Down Expand Up @@ -224,6 +226,8 @@ def __dir__():
from .data import freesolv
from .data import cdk2
from .data import solubility
from .data import chembl_drugs
from .data import chembl_samples

from .log import enable_rdkit_log
from .log import disable_rdkit_log
Expand Down
4 changes: 1 addition & 3 deletions datamol/conformers/_features.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
from typing import Optional, Union
from typing import Union
from typing import List
from typing import Optional

import numpy as np

from rdkit import Chem

from ..types import Mol
from ..utils.jobs import JobRunner
from ..utils import decorators
Expand Down
53 changes: 51 additions & 2 deletions datamol/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@
from typing import List
from typing import overload
from typing import Literal
from typing import cast

import sys
import io

try:
import importlib.resources as importlib_resources
except:
except ImportError:
import importlib_resources

import pandas as pd
Expand Down Expand Up @@ -170,3 +169,53 @@ def solubility(as_df: bool = True, mol_column: Optional[str] = "mol"):
return data

return from_df(data, mol_column=mol_column)


@overload
def chembl_drugs(as_df: Literal[True] = True) -> pd.DataFrame:
...


@overload
def chembl_drugs(as_df: Literal[False] = False) -> List[Mol]:
...


def chembl_drugs(as_df: bool = True) -> Union[List[Mol], pd.DataFrame]:
"""A list of ~2k molecules from ChEMBL (all drugs).
Originally, proposed by Patrick Walters at <https://github.com/PatWalters/practical_cheminformatics_posts/tree/b4dae239a8b942dab3a41e637ac55d4491aee96f/molskill>.
"""

with open_datamol_data_file("chembl_drugs.csv") as f:
data = pd.read_csv(f)

if not as_df:
data = from_df(data)

return data


@overload
def chembl_samples(as_df: Literal[True] = True) -> pd.DataFrame:
...


@overload
def chembl_samples(as_df: Literal[False] = False) -> List[Mol]:
...


def chembl_samples(as_df: bool = True) -> Union[List[Mol], pd.DataFrame]:
"""A list of ~2k molecules from ChEMBL.
Originally, proposed by Patrick Walters at <https://github.com/PatWalters/practical_cheminformatics_posts/tree/b4dae239a8b942dab3a41e637ac55d4491aee96f/molskill>.
"""

with open_datamol_data_file("chembl_samples.csv") as f:
data = pd.read_csv(f)

if not as_df:
data = from_df(data)

return data
1,936 changes: 1,936 additions & 0 deletions datamol/data/chembl_drugs.csv

Large diffs are not rendered by default.

Loading

0 comments on commit 2619c61

Please sign in to comment.