Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lig loader minor fix #77

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/plinder/core/index/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ def __init__(
*,
system_id: str,
prune: bool = True,
skip_3d_confgen: bool = False,
) -> None:
self.system_id: str = system_id
self.prune: bool = prune
self.skip_3d_confgen: bool = skip_3d_confgen
self._entry: dict[str, Any] | None = None
self._system: dict[str, Any] | None = None
self._archive: Path | None = None
Expand Down Expand Up @@ -413,6 +415,7 @@ def holo_structure(self) -> Structure:
protein_sequence=self.sequences,
ligand_sdfs=self.ligand_sdfs,
ligand_smiles=self.smiles,
skip_3d_confgen=self.skip_3d_confgen,
structure_type="holo",
)

Expand Down
83 changes: 45 additions & 38 deletions src/plinder/core/structure/atoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,58 +102,64 @@ def atom_array_from_cif_file(


def generate_input_conformer(
template_mol: Chem.Mol, addHs: bool = False, minimize_maxIters: int = -1
template_mol: Chem.Mol,
addHs: bool = False,
minimize_maxIters: int = -1,
skip_3d_confgen: bool = False,
) -> Chem.Mol:
_mol = copy.deepcopy(template_mol)
# need to add Hs to generate sensible conformers
_mol = Chem.AddHs(_mol)
# ps = AllChem.ETKDGv2()
# try embedding molecule using ETKDGv2 (default)
confid = AllChem.EmbedMolecule(
_mol,
# ps,
useRandomCoords=True,
useBasicKnowledge=True,
maxAttempts=100,
randomSeed=42,
)
if confid != -1:
if minimize_maxIters > 0:
# molecule successfully embedded - minimize
success = AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
# 0 if the optimization converged,
# -1 if the forcefield could not be set up,
# 1 if more iterations are required.
if success == 1:
log.info(
f"generate_conformer: MMFFOptimizeMolecule - more iterations are required, doubling the steps (2x {minimize_maxIters})"
)
# extend optimization to double the steps (extends by the same amount)
AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
elif success == -1:
log.warning(
"generate_conformer: MMFFOptimizeMolecule - the forcefield could not be set up"
)

if skip_3d_confgen:
confid = -1
else:
# this means EmbedMolecule failed
log.warning(
"generate_conformer: default EmbedMolecule - failed, try using useBasicKnowledge=False"
)
# try less optimal approach
# try embedding molecule using ETKDGv2 (default)
confid = AllChem.EmbedMolecule(
_mol,
# ps,
useRandomCoords=True,
useBasicKnowledge=False,
useBasicKnowledge=True,
maxAttempts=100,
randomSeed=42,
)
if confid == -1:
# if that still fails - try generating just 2D conformer
if confid != -1:
if minimize_maxIters > 0:
# molecule successfully embedded - minimize
success = AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
# 0 if the optimization converged,
# -1 if the forcefield could not be set up,
# 1 if more iterations are required.
if success == 1:
log.info(
f"generate_conformer: MMFFOptimizeMolecule - more iterations are required, doubling the steps (2x {minimize_maxIters})"
)
# extend optimization to double the steps (extends by the same amount)
AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
elif success == -1:
log.warning(
"generate_conformer: MMFFOptimizeMolecule - the forcefield could not be set up"
)
else:
# this means EmbedMolecule failed
log.warning(
"generate_conformer: EmbedMolecule - failed, trying rdDepictor.Compute2DCoords instead"
"generate_conformer: default EmbedMolecule - failed, trying using useBasicKnowledge=False"
)
# try less optimal approach
confid = AllChem.EmbedMolecule(
_mol,
useRandomCoords=True,
useBasicKnowledge=False,
maxAttempts=100,
randomSeed=42,
)
confid = rdDepictor.Compute2DCoords(_mol)

if confid == -1:
# if 3D confgen fails or skipped
log.warning(
"generate_conformer: using 2D (rdDepictor.Compute2DCoords) instead 3D"
)
confid = rdDepictor.Compute2DCoords(_mol)

# verify that mol has conformers
if _mol.GetNumConformers() == 0:
Expand Down Expand Up @@ -198,6 +204,7 @@ def get_template_to_mol_matches(
rascal_opts.returnEmptyMCES = True
rascal_opts.completeAromaticRings = False
rascal_opts.ringMatchesRingOnly = False
rascal_opts.maxBondMatchPairs = 5000
rascal_opts.timeout = 20

results = rdRascalMCES.FindMCES(mol, template, rascal_opts)
Expand Down
12 changes: 11 additions & 1 deletion src/plinder/core/structure/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ class Structure(BaseModel):
| None
) = None
add_ligand_hydrogens: bool = False
skip_3d_confgen: bool = False
structure_type: str = "holo"

"""Initialize structure.
Expand Down Expand Up @@ -147,6 +148,8 @@ class Structure(BaseModel):
paired stacked arrays (template vs holo) mapping atom order by index
add_ligand_hydrogens : bool = False
Whether to add hydrogen to ligand or not
skip_3d_confgen : bool = False
Use 2D coords instead of (default) 3D conformer generation
structure_type : str = "holo"
Structure type, "holo", "apo" or "pred"
"""
Expand Down Expand Up @@ -223,7 +226,9 @@ def load_ligands(self) -> None:

# get input_conformer with matches
(template_mol_conformer) = generate_input_conformer(
template_mol, addHs=self.add_ligand_hydrogens
template_mol,
addHs=self.add_ligand_hydrogens,
skip_3d_confgen=self.skip_3d_confgen,
)

self.ligand_mols[name] = (
Expand Down Expand Up @@ -256,6 +261,7 @@ def __add__(self, other: Structure) -> Structure | None:
protein_atom_array=combined_arr,
ligand_mols=self.ligand_mols,
add_ligand_hydrogens=self.add_ligand_hydrogens,
skip_3d_confgen=self.skip_3d_confgen,
structure_type=structure_type,
)
else:
Expand Down Expand Up @@ -303,6 +309,7 @@ def filter(
protein_atom_array=arr,
ligand_mols=self.ligand_mols,
add_ligand_hydrogens=self.add_ligand_hydrogens,
skip_3d_confgen=self.skip_3d_confgen,
structure_type=self.structure_type,
)
assert self.protein_atom_array is not None
Expand Down Expand Up @@ -369,6 +376,7 @@ def align_common_sequence(
protein_atom_array=target_at,
ligand_mols=self.ligand_mols,
add_ligand_hydrogens=self.add_ligand_hydrogens,
skip_3d_confgen=self.skip_3d_confgen,
structure_type=self.structure_type,
)

Expand All @@ -381,6 +389,7 @@ def align_common_sequence(
protein_atom_array=ref_at,
ligand_mols=other.ligand_mols,
add_ligand_hydrogens=other.add_ligand_hydrogens,
skip_3d_confgen=other.skip_3d_confgen,
structure_type=other.structure_type,
)

Expand Down Expand Up @@ -435,6 +444,7 @@ def superimpose(
protein_atom_array=superimposed,
ligand_mols=None if strip_ligands else self.ligand_mols,
add_ligand_hydrogens=self.add_ligand_hydrogens,
skip_3d_confgen=self.skip_3d_confgen,
structure_type=self.structure_type,
),
raw_rmsd,
Expand Down
Loading