aqlaboratory · avelinoapheris · Nov 18, 2025 · jnwei · Nov 19, 2025 · jnwei
diff --git a/openfold3/core/data/framework/single_datasets/base_of3.py b/openfold3/core/data/framework/single_datasets/base_of3.py
@@ -139,11 +139,10 @@ def __init__(self, dataset_config) -> None:
         )
         self.datapoint_cache = {}
 
-        # CCD - only used if template structures are not preprocessed
-        if dataset_config.dataset_paths.template_structure_array_directory is not None:
-            self.ccd = None
-        else:
+        if dataset_config.dataset_paths.template_structures_directory is not None:
             self.ccd = pdbx.CIFFile.read(dataset_config.dataset_paths.ccd_file)
+        else:
+            self.ccd = None
 
         # Dataset configuration
         # n_tokens can be set in the getitem method separately for each sample using

diff --git a/openfold3/core/data/framework/single_datasets/validation.py b/openfold3/core/data/framework/single_datasets/validation.py
@@ -93,13 +93,16 @@ def create_datapoint_cache(self):
         pdb_ids = list(self.dataset_cache.structure_data.keys())
 
         def null_safe_token_count(x):
-            token_count = self.dataset_cache.structure_data[x].token_count
+            elem = self.dataset_cache.structure_data[x]
+            token_count = elem.token_count if hasattr(elem, "token_count") else None
             return token_count if token_count is not None else 0
 
         pdb_ids = sorted(
             pdb_ids,
             key=null_safe_token_count,
+            reverse=False,
         )
+
         _datapoint_cache = pd.DataFrame({"pdb_id": pdb_ids})
         self.datapoint_cache = pad_to_world_size(_datapoint_cache, self.world_size)
 
@@ -186,15 +189,19 @@ def get_validation_homology_features(self, pdb_id: str, sample_data: dict) -> di
 
         structure_entry = self.dataset_cache.structure_data[pdb_id]
 
+        def _use_metrics(x):
+            """Check if the chain or interface should be used for metrics."""
+            return x.use_metrics if hasattr(x, "use_metrics") else True
+
         chains_for_intra_metrics = [
             int(cid)
             for cid, cdata in structure_entry.chains.items()
-            if cdata.use_metrics
+            if _use_metrics(cdata)
         ]
 
         interfaces_to_include = []
         for interface_id, cluster_data in structure_entry.interfaces.items():
-            if cluster_data.use_metrics:
+            if _use_metrics(cluster_data):
                 interface_chains = tuple(int(ci) for ci in interface_id.split("_"))
                 interfaces_to_include.append(interface_chains)
 

diff --git a/openfold3/core/data/primitives/structure/template.py b/openfold3/core/data/primitives/structure/template.py
@@ -162,6 +162,9 @@ def sample_templates(
         dict[str, TemplateCacheEntry] | dict[None]:
             The sampled template data per chain given chain.
     """
+    if not template_structure_array_directory and not template_cache_directory:
+        return {}
+
     chain_data = assembly_data[chain_id]
     template_ids = chain_data["template_ids"]
     if not template_ids:
@@ -200,7 +203,7 @@ def sample_templates(
     else:
         k = np.min([np.random.randint(0, l), n_templates])
 
-    if k > 0:
+    if (k > 0) and (template_cache_directory is not None):
         # Load template cache entry numpy file
         # From the representative ID during training
         if "alignment_representative_id" in chain_data:

diff --git a/openfold3/entry_points/experiment_runner.py b/openfold3/entry_points/experiment_runner.py
@@ -393,7 +393,8 @@ def manual_load_checkpoint(self):
         self.lightning_module.load_state_dict(
             state_dict, strict=self.ckpt_load_settings.strict_loading
         )
-        self.lightning_module.ema.load_state_dict(ckpt["ema"])
+        if "ema" in ckpt:
+            self.lightning_module.ema.load_state_dict(ckpt["ema"])
 
         if self.ckpt_load_settings.restore_lr_scheduler:
             last_global_step = int(ckpt["global_step"])

diff --git a/openfold3/entry_points/validator.py b/openfold3/entry_points/validator.py
@@ -81,8 +81,12 @@ def _maybe_download_parameters(target_path: Path) -> None:
 class CheckpointConfig(BaseModel):
     """Settings for training checkpoint writing."""
 
+    model_config = PydanticConfigDict(extra="allow")
+    monitor: str | None = None
     every_n_epochs: int = 1
     auto_insert_metric_name: bool = False
+    filename: str | None = None
+    enable_version_counter: bool = True
     save_last: bool = True
     save_top_k: int = -1
 

diff --git a/openfold3/projects/of3_all_atom/config/dataset_configs.py b/openfold3/projects/of3_all_atom/config/dataset_configs.py
@@ -80,12 +80,13 @@ def _validate_exactly_one_path_exists(
             group_name: str, path_values: list[Path | None]
         ):
             which_paths_exist = [p is not None for p in path_values]
-            if sum(which_paths_exist) != 1:
+            if sum(which_paths_exist) > 1:
                 existing_paths = [
                     p for p, b in zip(path_values, which_paths_exist, strict=True) if b
                 ]
                 raise ValueError(
-                    f"Exactly one path in set of {group_name} should exist."
+                    "If there is a template folder, "
+                    f"exactly one path in set of {group_name} should exist."
                     f"Found {existing_paths} exist."
                 )