Skip to content

Commit

Permalink
Merge branch 'master' into tensor_utils
Browse files Browse the repository at this point in the history
  • Loading branch information
a-r-j authored Jan 29, 2023
2 parents c9dde87 + 4634887 commit ed463a4
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 24 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ repos:
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/psf/black
rev: 22.10.0
rev: 22.12.0
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v1.2.3
hooks:
- id: flake8
- repo: https://github.com/pycqa/isort
rev: 5.10.1
rev: 5.11.4
hooks:
- id: isort
2 changes: 1 addition & 1 deletion .requirements/base.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ loguru
matplotlib>=3.4.3
multipledispatch
networkx
numpy
numpy<1.24.0
pandas
plotly
pydantic
Expand Down
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
* [Tensor Operations] - [#244](https://github.com/a-r-j/graphein/pull/244) Adds suite of utilities for working with ESMfold (graphein.protein.folding_utils).



#### Improvements

* [Logging] - [#221](https://github.com/a-r-j/graphein/pull/221) Adds global control of logging with `graphein.verbose(enabled=False)`.
* [Logging] - [#242](https://github.com/a-r-j/graphein/pull/242) Adds control of protein graph construction logging. Resolves [#238](https://github.com/a-r-j/graphein/issues/238)

#### Protein

* [Bugfix] - [#254](https://github.com/a-r-j/graphein/pull/254) Fix peptide bond addition for all atom graphs.
* [Bugfix] - [#223](https://github.com/a-r-j/graphein/pull/220) Fix handling of insertions in protein graphs. Insertions are now given IDs like: `A:SER:12:A`. Contribution by @manonreau.
* [Bugfix] - [#226](https://github.com/a-r-j/graphein/pull/226) Catches failed AF2 structure downloads [#225](https://github.com/a-r-j/graphein/issues/225)
* [Feature] - [#229](https://github.com/a-r-j/graphein/pull/220) Adds support for filtering KNN edges based on self-loops and chain membership. Contribution by @anton-bushuiev.
Expand Down
50 changes: 30 additions & 20 deletions graphein/protein/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from __future__ import annotations

import traceback
from contextlib import nullcontext
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

Expand Down Expand Up @@ -628,6 +629,7 @@ def construct_graph(
edge_annotation_funcs: Optional[List[Callable]] = None,
node_annotation_funcs: Optional[List[Callable]] = None,
graph_annotation_funcs: Optional[List[Callable]] = None,
verbose: bool = True,
) -> nx.Graph:
"""
Constructs protein structure graph from a ``pdb_code`` or ``pdb_path``.
Expand Down Expand Up @@ -674,6 +676,9 @@ def construct_graph(
:param graph_annotation_funcs: List of graph annotation function.
Default is ``None``.
:type graph_annotation_funcs: List[Callable]
:param verbose: Controls the verbosity.
Default is ``True``.
:type verbose: bool
:return: Protein Structure Graph
:rtype: nx.Graph
"""
Expand All @@ -687,13 +692,17 @@ def construct_graph(
# If no config is provided, use default
if config is None:
config = ProteinGraphConfig()
with Progress(transient=True) as progress:
task1 = progress.add_task("Reading PDB file...", total=1)
# Get name from pdb_file is no pdb_code is provided
# if pdb_path and (pdb_code is None and uniprot_id is None):
# pdb_code = get_protein_name_from_filename(pdb_path)
# pdb_code = pdb_code if len(pdb_code) == 4 else None
progress.advance(task1)

# Use progress tracking context if in verbose mode
context = Progress(transient=True) if verbose else nullcontext
with context as progress:
if verbose:
task1 = progress.add_task("Reading PDB file...", total=1)
# Get name from pdb_file is no pdb_code is provided
# if pdb_path and (pdb_code is None and uniprot_id is None):
# pdb_code = get_protein_name_from_filename(pdb_path)
# pdb_code = pdb_code if len(pdb_code) == 4 else None
progress.advance(task1)

# If config params are provided, overwrite them
config.protein_df_processing_functions = (
Expand Down Expand Up @@ -728,14 +737,9 @@ def construct_graph(
uniprot_id,
model_index=model_index,
)
task2 = progress.add_task("Processing PDB dataframe...", total=1)
# raw_df = label_node_id(raw_df, granularity=config.granularity)
# raw_df.df["ATOM"] = label_node_id(
# raw_df.df["ATOM"], granularity=config.granularity
# )
# raw_df.df["HETATM"] = label_node_id(
# raw_df.df["HETATM"], granularity=config.granularity
# )

if verbose:
task2 = progress.add_task("Processing PDB dataframe...", total=1)
raw_df = sort_dataframe(raw_df)
protein_df = process_dataframe(
raw_df,
Expand All @@ -744,9 +748,11 @@ def construct_graph(
insertions=config.insertions,
keep_hets=config.keep_hets,
)
progress.advance(task2)

task3 = progress.add_task("Initializing graph...", total=1)
if verbose:
progress.advance(task2)

task3 = progress.add_task("Initializing graph...", total=1)
# Initialise graph with metadata
g = initialise_graph_with_metadata(
protein_df=protein_df,
Expand All @@ -765,15 +771,19 @@ def construct_graph(
# Annotate additional node metadata
if config.node_metadata_functions is not None:
g = annotate_node_metadata(g, config.node_metadata_functions)
progress.advance(task3)
task4 = progress.add_task("Constructing edges...", total=1)

if verbose:
progress.advance(task3)
task4 = progress.add_task("Constructing edges...", total=1)
# Compute graph edges
g = compute_edges(
g,
funcs=config.edge_construction_functions,
get_contacts_config=None,
)
progress.advance(task4)

if verbose:
progress.advance(task4)

# Annotate additional graph metadata
if config.graph_metadata_functions is not None:
Expand Down

0 comments on commit ed463a4

Please sign in to comment.