Skip to content

Commit d64b4f7

Browse files
committed
Fix up CASP15 single-seq method support
1 parent db43a83 commit d64b4f7

File tree

2 files changed

+32
-15
lines changed

2 files changed

+32
-15
lines changed

posebench/analysis/complex_alignment.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def align_complex_to_protein_only(
168168
save_ligand: bool = True,
169169
aligned_filename_suffix: str = "_aligned",
170170
atom_df_name: str = "ATOM",
171-
):
171+
) -> int:
172172
"""Align a predicted protein-ligand structure to a reference protein structure.
173173
174174
:param predicted_protein_pdb: Path to the predicted protein structure in PDB format
@@ -178,6 +178,7 @@ def align_complex_to_protein_only(
178178
:param save_ligand: Whether to save the aligned ligand structure
179179
:param aligned_filename_suffix: suffix to append to the aligned files
180180
:param atom_df_name: Name of the atom dataframe in the PDB file
181+
:return: 0 if successful, 1 if unsuccessful
181182
"""
182183
from biopandas.pdb import PandasPdb
183184
from rdkit import Chem
@@ -197,14 +198,14 @@ def align_complex_to_protein_only(
197198
logger.warning(
198199
f"Unable to parse predicted protein structure {predicted_protein_pdb} due to the error: {e}. Skipping..."
199200
)
200-
return
201+
return 1
201202
try:
202203
reference_rec = parse_pdb_from_path(reference_protein_pdb)
203204
except Exception as e:
204205
logger.warning(
205206
f"Unable to parse reference protein structure {reference_protein_pdb} due to the error: {e}. Skipping..."
206207
)
207-
return
208+
return 1
208209
if predicted_ligand_sdf is not None:
209210
predicted_ligand = read_molecule(predicted_ligand_sdf, remove_hs=True, sanitize=True)
210211
try:
@@ -215,7 +216,7 @@ def align_complex_to_protein_only(
215216
logger.warning(
216217
f"Unable to extract predicted protein structure coordinates for input {predicted_protein_pdb} due to the error: {e}. Skipping..."
217218
)
218-
return
219+
return 1
219220
try:
220221
reference_calpha_coords = extract_receptor_structure(
221222
reference_rec, None, filter_out_hetero_residues=True
@@ -224,23 +225,23 @@ def align_complex_to_protein_only(
224225
logger.warning(
225226
f"Unable to extract reference protein structure coordinates for input {predicted_protein_pdb} due to the error: {e}. Skipping..."
226227
)
227-
return
228+
return 1
228229
if predicted_ligand_sdf is not None:
229230
try:
230231
predicted_ligand_conf = predicted_ligand.GetConformer()
231232
except Exception as e:
232233
logger.warning(
233234
f"Unable to extract predicted ligand conformer for {predicted_ligand_sdf} due to the error: {e}. Skipping..."
234235
)
235-
return
236+
return 1
236237

237238
if reference_calpha_coords.shape != predicted_calpha_coords.shape:
238239
logger.warning(
239240
f"Receptor structures differ for prediction {predicted_protein_pdb}. Skipping due to shape mismatch:",
240241
reference_calpha_coords.shape,
241242
predicted_calpha_coords.shape,
242243
)
243-
return
244+
return 1
244245

245246
# Perform the alignment
246247
rotation, reference_calpha_centroid, predicted_calpha_centroid = align_prediction(
@@ -287,6 +288,8 @@ def align_complex_to_protein_only(
287288
) as f:
288289
f.write(predicted_ligand)
289290

291+
return 0
292+
290293

291294
@hydra.main(
292295
version_base="1.3",

posebench/models/ensemble_generation.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,7 @@ def get_method_predictions(
839839
cfg: DictConfig,
840840
binding_site_method: Optional[str] = None,
841841
input_protein_filepath: Optional[str] = None,
842+
is_ss_method: bool = False,
842843
) -> List[Tuple[str, str]]:
843844
"""Get the predictions generated by the method.
844845
@@ -848,11 +849,13 @@ def get_method_predictions(
848849
:param binding_site_method: Optional name of the method used to predict AutoDock Vina's binding
849850
sites.
850851
:param input_protein_filepath: Optional path to the input protein structure PDB file.
852+
:param is_ss_method: Whether the method is a single-sequence method.
851853
:return: List of method predictions, each as a tuple of the output protein filepath and the
852854
output ligand filepath.
853855
"""
854856
pocket_only_suffix = "_pocket_only" if cfg.pocket_only_baseline else ""
855857
no_ilcl_suffix = "_no_ilcl" if cfg.neuralplexer_no_ilcl else ""
858+
single_seq_suffix = "_ss" if is_ss_method else ""
856859

857860
if method == "diffdock":
858861
ensemble_benchmarking_output_dir = (
@@ -981,7 +984,7 @@ def get_method_predictions(
981984
elif method == "neuralplexer":
982985
ensemble_benchmarking_output_dir = (
983986
Path(cfg.input_dir if cfg.input_dir else cfg.neuralplexer_out_path).parent
984-
/ f"neuralplexer{pocket_only_suffix}{no_ilcl_suffix}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}"
987+
/ f"neuralplexer{single_seq_suffix}{pocket_only_suffix}{no_ilcl_suffix}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}"
985988
if cfg.ensemble_benchmarking
986989
else (cfg.input_dir if cfg.input_dir else cfg.neuralplexer_out_path)
987990
)
@@ -1078,7 +1081,7 @@ def get_method_predictions(
10781081
elif method == "chai-lab":
10791082
ensemble_benchmarking_output_dir = (
10801083
Path(cfg.input_dir if cfg.input_dir else cfg.chai_out_path).parent
1081-
/ f"chai-lab{pocket_only_suffix}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}"
1084+
/ f"chai-lab{single_seq_suffix}{pocket_only_suffix}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}"
10821085
if cfg.ensemble_benchmarking
10831086
else (cfg.input_dir if cfg.input_dir else cfg.chai_out_path)
10841087
)
@@ -1112,7 +1115,7 @@ def get_method_predictions(
11121115
elif method == "alphafold3":
11131116
ensemble_benchmarking_output_dir = (
11141117
Path(cfg.input_dir if cfg.input_dir else cfg.alphafold3_out_path).parent
1115-
/ f"alphafold3{pocket_only_suffix}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}"
1118+
/ f"alphafold3{single_seq_suffix}{pocket_only_suffix}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}"
11161119
if cfg.ensemble_benchmarking
11171120
else (cfg.input_dir if cfg.input_dir else cfg.alphafold3_out_path)
11181121
)
@@ -1340,7 +1343,7 @@ def generate_ensemble_predictions(
13401343

13411344
if cfg.resume:
13421345
ensemble_predictions_dict = {}
1343-
for method in cfg.ensemble_methods:
1346+
for method, is_ss_method in zip(cfg.ensemble_methods, cfg.is_ss_ensemble_method):
13441347
if method == "vina":
13451348
for binding_site_method in cfg.vina_binding_site_methods:
13461349
method_predictions = get_method_predictions(
@@ -1349,11 +1352,16 @@ def generate_ensemble_predictions(
13491352
cfg,
13501353
binding_site_method=binding_site_method,
13511354
input_protein_filepath=protein_filepath,
1355+
is_ss_method=is_ss_method,
13521356
)
13531357
ensemble_predictions_dict[f"vina_{binding_site_method}"] = method_predictions
13541358
else:
13551359
method_predictions = get_method_predictions(
1356-
method, input_id, cfg, input_protein_filepath=protein_filepath
1360+
method,
1361+
input_id,
1362+
cfg,
1363+
input_protein_filepath=protein_filepath,
1364+
is_ss_method=is_ss_method,
13571365
)
13581366
ensemble_predictions_dict[method] = method_predictions
13591367

@@ -1570,11 +1578,16 @@ def rank_ensemble_predictions(
15701578
and apo_reference_protein_filepath is not None
15711579
):
15721580
try:
1573-
align_complex_to_protein_only(
1581+
alignment_return_code = align_complex_to_protein_only(
15741582
protein_filepath, ligand_filepath, apo_reference_protein_filepath
15751583
)
1576-
protein_filepath = protein_filepath.replace(".pdb", "_aligned.pdb")
1577-
ligand_filepath = ligand_filepath.replace(".sdf", "_aligned.sdf")
1584+
if alignment_return_code == 0:
1585+
protein_filepath = protein_filepath.replace(".pdb", "_aligned.pdb")
1586+
ligand_filepath = ligand_filepath.replace(".sdf", "_aligned.sdf")
1587+
else:
1588+
logger.warning(
1589+
f"Failed to align predicted complex structure {protein_filepath} and ligand structure {ligand_filepath} to the apo protein structure {apo_reference_protein_filepath} from method {method}. Skipping alignment..."
1590+
)
15781591
except Exception as e:
15791592
logger.warning(
15801593
f"Failed to align protein-ligand complex {protein_filepath} and {ligand_filepath} to apo protein structure {apo_reference_protein_filepath}. Skipping alignment due to: {e}"
@@ -2247,6 +2260,7 @@ def main(cfg: DictConfig):
22472260
with open_dict(cfg):
22482261
# NOTE: besides their output directories, single-sequence baselines are treated like their multi-sequence counterparts
22492262
output_dir = copy.deepcopy(cfg.output_dir)
2263+
cfg.is_ss_ensemble_method = [s.endswith("_ss") for s in cfg.ensemble_methods]
22502264
cfg.ensemble_methods = [s.removesuffix("_ss") for s in cfg.ensemble_methods]
22512265
cfg.output_dir = output_dir
22522266

0 commit comments

Comments
 (0)