diff --git a/clinicadl/API_test.py b/clinicadl/API_test.py new file mode 100644 index 000000000..a7240c92d --- /dev/null +++ b/clinicadl/API_test.py @@ -0,0 +1,17 @@ +from clinicadl.caps_dataset.caps_dataset_config import CapsDatasetConfig +from clinicadl.prepare_data.prepare_data import DeepLearningPrepareData +from clinicadl.trainer.config.classification import ClassificationConfig +from clinicadl.trainer.trainer import Trainer +from clinicadl.utils.enum import ExtractionMethod, Preprocessing, Task +from clinicadl.utils.iotools.train_utils import merge_cli_and_config_file_options + +image_config = CapsDatasetConfig.from_preprocessing_and_extraction_method( + extraction=ExtractionMethod.IMAGE, + preprocessing_type=Preprocessing.T1_LINEAR, +) + +DeepLearningPrepareData(image_config) + +config = ClassificationConfig() +trainer = Trainer(config) +trainer.train(split_list=config.cross_validation.split, overwrite=True) diff --git a/clinicadl/maps_manager/maps_manager.py b/clinicadl/maps_manager/maps_manager.py index 73b6430eb..3b32486b5 100644 --- a/clinicadl/maps_manager/maps_manager.py +++ b/clinicadl/maps_manager/maps_manager.py @@ -7,8 +7,6 @@ import pandas as pd import torch -import torch.distributed as dist -from torch.amp import autocast from clinicadl.caps_dataset.caps_dataset_utils import read_json from clinicadl.caps_dataset.data import ( @@ -17,7 +15,6 @@ from clinicadl.metrics.metric_module import MetricModule from clinicadl.metrics.utils import ( check_selection_metric, - find_selection_metrics, ) from clinicadl.predict.utils import get_prediction from clinicadl.trainer.tasks_utils import ( @@ -25,8 +22,6 @@ evaluation_metrics, generate_label_code, output_size, - test, - test_da, ) from clinicadl.transforms.config import TransformsConfig from clinicadl.utils import cluster @@ -149,274 +144,6 @@ def __getattr__(self, name): ################################### # High-level functions templates # ################################### - def _test_loader( - self, - dataloader, - criterion, - data_group: str, - split: int, - selection_metrics, - use_labels=True, - gpu=None, - amp=False, - network=None, - report_ci=True, - ): - """ - Launches the testing task on a dataset wrapped by a DataLoader and writes prediction TSV files. - - Args: - dataloader (torch.utils.data.DataLoader): DataLoader wrapping the test CapsDataset. - criterion (torch.nn.modules.loss._Loss): optimization criterion used during training. - data_group (str): name of the data group used for the testing task. - split (int): Index of the split used to train the model tested. - selection_metrics (list[str]): List of metrics used to select the best models which are tested. - use_labels (bool): If True, the labels must exist in test meta-data and metrics are computed. - gpu (bool): If given, a new value for the device of the model will be computed. - amp (bool): If enabled, uses Automatic Mixed Precision (requires GPU usage). - network (int): Index of the network tested (only used in multi-network setting). - """ - for selection_metric in selection_metrics: - if cluster.master: - log_dir = ( - self.maps_path - / f"{self.split_name}-{split}" - / f"best-{selection_metric}" - / data_group - ) - self.write_description_log( - log_dir, - data_group, - dataloader.dataset.config.data.caps_dict, - dataloader.dataset.config.data.data_df, - ) - - # load the best trained model during the training - model, _ = self._init_model( - transfer_path=self.maps_path, - split=split, - transfer_selection=selection_metric, - gpu=gpu, - network=network, - ) - model = DDP(model, fsdp=self.fully_sharded_data_parallel, amp=self.amp) - - prediction_df, metrics = test( - mode=self.mode, - metrics_module=self.metrics_module, - n_classes=self.n_classes, - network_task=self.network_task, - model=model, - dataloader=dataloader, - criterion=criterion, - use_labels=use_labels, - amp=amp, - report_ci=report_ci, - ) - if use_labels: - if network is not None: - metrics[f"{self.mode}_id"] = network - - loss_to_log = ( - metrics["Metric_values"][-1] if report_ci else metrics["loss"] - ) - - logger.info( - f"{self.mode} level {data_group} loss is {loss_to_log} for model selected on {selection_metric}" - ) - - if cluster.master: - # Replace here - self._mode_level_to_tsv( - prediction_df, - metrics, - split, - selection_metric, - data_group=data_group, - ) - - def _test_loader_ssda( - self, - dataloader, - criterion, - alpha, - data_group, - split, - selection_metrics, - use_labels=True, - gpu=None, - network=None, - target=False, - report_ci=True, - ): - """ - Launches the testing task on a dataset wrapped by a DataLoader and writes prediction TSV files. - - Args: - dataloader (torch.utils.data.DataLoader): DataLoader wrapping the test CapsDataset. - criterion (torch.nn.modules.loss._Loss): optimization criterion used during training. - data_group (str): name of the data group used for the testing task. - split (int): Index of the split used to train the model tested. - selection_metrics (list[str]): List of metrics used to select the best models which are tested. - use_labels (bool): If True, the labels must exist in test meta-data and metrics are computed. - gpu (bool): If given, a new value for the device of the model will be computed. - network (int): Index of the network tested (only used in multi-network setting). - """ - for selection_metric in selection_metrics: - log_dir = ( - self.maps_path - / f"{self.split_name}-{split}" - / f"best-{selection_metric}" - / data_group - ) - self.write_description_log( - log_dir, - data_group, - dataloader.dataset.caps_dict, - dataloader.dataset.df, - ) - - # load the best trained model during the training - model, _ = self._init_model( - transfer_path=self.maps_path, - split=split, - transfer_selection=selection_metric, - gpu=gpu, - network=network, - ) - prediction_df, metrics = test_da( - self.network_task, - model, - dataloader, - criterion, - target=target, - report_ci=report_ci, - ) - if use_labels: - if network is not None: - metrics[f"{self.mode}_id"] = network - - if report_ci: - loss_to_log = metrics["Metric_values"][-1] - else: - loss_to_log = metrics["loss"] - - logger.info( - f"{self.mode} level {data_group} loss is {loss_to_log} for model selected on {selection_metric}" - ) - - # Replace here - self._mode_level_to_tsv( - prediction_df, metrics, split, selection_metric, data_group=data_group - ) - - @torch.no_grad() - def _compute_output_tensors( - self, - dataset, - data_group, - split, - selection_metrics, - nb_images=None, - gpu=None, - network=None, - ): - """ - Compute the output tensors and saves them in the MAPS. - - Args: - dataset (clinicadl.caps_dataset.data.CapsDataset): wrapper of the data set. - data_group (str): name of the data group used for the task. - split (int): split number. - selection_metrics (list[str]): metrics used for model selection. - nb_images (int): number of full images to write. Default computes the outputs of the whole data set. - gpu (bool): If given, a new value for the device of the model will be computed. - network (int): Index of the network tested (only used in multi-network setting). - """ - for selection_metric in selection_metrics: - # load the best trained model during the training - model, _ = self._init_model( - transfer_path=self.maps_path, - split=split, - transfer_selection=selection_metric, - gpu=gpu, - network=network, - nb_unfrozen_layer=self.nb_unfrozen_layer, - ) - model = DDP(model, fsdp=self.fully_sharded_data_parallel, amp=self.amp) - model.eval() - - tensor_path = ( - self.maps_path - / f"{self.split_name}-{split}" - / f"best-{selection_metric}" - / data_group - / "tensors" - ) - if cluster.master: - tensor_path.mkdir(parents=True, exist_ok=True) - dist.barrier() - - if nb_images is None: # Compute outputs for the whole data set - nb_modes = len(dataset) - else: - nb_modes = nb_images * dataset.elem_per_image - - for i in [ - *range(cluster.rank, nb_modes, cluster.world_size), - *range(int(nb_modes % cluster.world_size <= cluster.rank)), - ]: - data = dataset[i] - image = data["image"] - x = image.unsqueeze(0).to(model.device) - with autocast("cuda", enabled=self.std_amp): - output = model(x) - output = output.squeeze(0).cpu().float() - participant_id = data["participant_id"] - session_id = data["session_id"] - mode_id = data[f"{self.mode}_id"] - input_filename = ( - f"{participant_id}_{session_id}_{self.mode}-{mode_id}_input.pt" - ) - output_filename = ( - f"{participant_id}_{session_id}_{self.mode}-{mode_id}_output.pt" - ) - torch.save(image, tensor_path / input_filename) - torch.save(output, tensor_path / output_filename) - logger.debug(f"File saved at {[input_filename, output_filename]}") - - def _ensemble_prediction( - self, - data_group, - split, - selection_metrics, - use_labels=True, - skip_leak_check=False, - ): - """Computes the results on the image-level.""" - - if not selection_metrics: - selection_metrics = find_selection_metrics( - self.maps_path, self.split_name, split - ) - - for selection_metric in selection_metrics: - ##################### - # Soft voting - if self.num_networks > 1 and not skip_leak_check: - self._ensemble_to_tsv( - split, - selection=selection_metric, - data_group=data_group, - use_labels=use_labels, - ) - elif self.mode != "image" and not skip_leak_check: - self._mode_to_image_tsv( - split, - selection=selection_metric, - data_group=data_group, - use_labels=use_labels, - ) ############################### # Checks # diff --git a/clinicadl/predict/predict_manager.py b/clinicadl/predict/predict_manager.py index 879ef0e54..c197a96de 100644 --- a/clinicadl/predict/predict_manager.py +++ b/clinicadl/predict/predict_manager.py @@ -29,6 +29,7 @@ ClinicaDLDataLeakageError, MAPSError, ) +from clinicadl.validator.validator import Validator logger = getLogger("clinicadl.predict_manager") level_list: List[str] = ["warning", "info", "debug"] @@ -38,6 +39,7 @@ class PredictManager: def __init__(self, _config: Union[PredictConfig, InterpretConfig]) -> None: self.maps_manager = MapsManager(_config.maps_dir) self._config = _config + self.validator = Validator() def predict( self, @@ -183,7 +185,8 @@ def predict( split_selection_metrics, ) if cluster.master: - self.maps_manager._ensemble_prediction( + self.validator._ensemble_prediction( + self.maps_manager, self._config.data_group, split, self._config.selection_metrics, @@ -288,12 +291,13 @@ def _predict_multi( if self._config.n_proc is not None else self.maps_manager.n_proc, ) - self.maps_manager._test_loader( - test_loader, - criterion, - self._config.data_group, - split, - split_selection_metrics, + self.validator._test_loader( + maps_manager=self.maps_manager, + dataloader=test_loader, + criterion=criterion, + data_group=self._config.data_group, + split=split, + selection_metrics=split_selection_metrics, use_labels=self._config.use_labels, gpu=self._config.gpu, amp=self._config.amp, @@ -301,7 +305,8 @@ def _predict_multi( ) if self._config.save_tensor: logger.debug("Saving tensors") - self.maps_manager._compute_output_tensors( + self.validator._compute_output_tensors( + self.maps_manager, data_test, self._config.data_group, split, @@ -416,7 +421,8 @@ def _predict_single( if self._config.n_proc is not None else self.maps_manager.n_proc, ) - self.maps_manager._test_loader( + self.validator._test_loader( + self.maps_manager, test_loader, criterion, self._config.data_group, @@ -428,7 +434,8 @@ def _predict_single( ) if self._config.save_tensor: logger.debug("Saving tensors") - self.maps_manager._compute_output_tensors( + self.validator._compute_output_tensors( + self.maps_manager, data_test, self._config.data_group, split, diff --git a/clinicadl/trainer/tasks_utils.py b/clinicadl/trainer/tasks_utils.py index 93a652aa8..dc28d0acd 100644 --- a/clinicadl/trainer/tasks_utils.py +++ b/clinicadl/trainer/tasks_utils.py @@ -207,175 +207,6 @@ def evaluation_metrics(network_task: Union[str, Task]): raise ValueError("Unknown network task") -def test( - mode: str, - metrics_module: MetricModule, - n_classes: int, - network_task, - model: Network, - dataloader: DataLoader, - criterion: _Loss, - use_labels: bool = True, - amp: bool = False, - report_ci=False, -) -> Tuple[pd.DataFrame, Dict[str, float]]: - """ - Computes the predictions and evaluation metrics. - - Parameters - ---------- - model: Network - The model trained. - dataloader: DataLoader - Wrapper of a CapsDataset. - criterion: _Loss - Function to calculate the loss. - use_labels: bool - If True the true_label will be written in output DataFrame - and metrics dict will be created. - amp: bool - If True, enables Pytorch's automatic mixed precision. - - Returns - ------- - the results and metrics on the image level. - """ - model.eval() - dataloader.dataset.eval() - - results_df = pd.DataFrame(columns=columns(network_task, mode, n_classes)) - total_loss = {} - with torch.no_grad(): - for i, data in enumerate(dataloader): - # initialize the loss list to save the loss components - with autocast("cuda", enabled=amp): - outputs, loss_dict = model(data, criterion, use_labels=use_labels) - - if i == 0: - for loss_component in loss_dict.keys(): - total_loss[loss_component] = 0 - for loss_component in total_loss.keys(): - total_loss[loss_component] += loss_dict[loss_component].float() - - # Generate detailed DataFrame - for idx in range(len(data["participant_id"])): - row = generate_test_row( - network_task, - mode, - metrics_module, - n_classes, - idx, - data, - outputs.float(), - ) - row_df = pd.DataFrame( - row, columns=columns(network_task, mode, n_classes) - ) - results_df = pd.concat([results_df, row_df]) - - del outputs, loss_dict - dataframes = [None] * dist.get_world_size() - dist.gather_object(results_df, dataframes if dist.get_rank() == 0 else None, dst=0) - if dist.get_rank() == 0: - results_df = pd.concat(dataframes) - del dataframes - results_df.reset_index(inplace=True, drop=True) - - if not use_labels: - metrics_dict = None - else: - metrics_dict = compute_metrics( - network_task, results_df, metrics_module, report_ci=report_ci - ) - for loss_component in total_loss.keys(): - dist.reduce(total_loss[loss_component], dst=0) - loss_value = total_loss[loss_component].item() / cluster.world_size - - if report_ci: - metrics_dict["Metric_names"].append(loss_component) - metrics_dict["Metric_values"].append(loss_value) - metrics_dict["Lower_CI"].append("N/A") - metrics_dict["Upper_CI"].append("N/A") - metrics_dict["SE"].append("N/A") - - else: - metrics_dict[loss_component] = loss_value - - torch.cuda.empty_cache() - - return results_df, metrics_dict - - -def test_da( - mode: str, - metrics_module: MetricModule, - n_classes: int, - network_task: Union[str, Task], - model: Network, - dataloader: DataLoader, - criterion: _Loss, - alpha: float = 0, - use_labels: bool = True, - target: bool = True, - report_ci=False, -) -> Tuple[pd.DataFrame, Dict[str, float]]: - """ - Computes the predictions and evaluation metrics. - - Args: - model: the model trained. - dataloader: wrapper of a CapsDataset. - criterion: function to calculate the loss. - use_labels: If True the true_label will be written in output DataFrame - and metrics dict will be created. - Returns: - the results and metrics on the image level. - """ - model.eval() - dataloader.dataset.eval() - results_df = pd.DataFrame(columns=columns(network_task, mode, n_classes)) - total_loss = 0 - with torch.no_grad(): - for i, data in enumerate(dataloader): - outputs, loss_dict = model.compute_outputs_and_loss_test( - data, criterion, alpha, target - ) - total_loss += loss_dict["loss"].item() - - # Generate detailed DataFrame - for idx in range(len(data["participant_id"])): - row = generate_test_row( - network_task, mode, metrics_module, n_classes, idx, data, outputs - ) - row_df = pd.DataFrame( - row, columns=columns(network_task, mode, n_classes) - ) - results_df = pd.concat([results_df, row_df]) - - del outputs, loss_dict - results_df.reset_index(inplace=True, drop=True) - - if not use_labels: - metrics_dict = None - else: - metrics_dict = compute_metrics( - network_task, results_df, metrics_module, report_ci=report_ci - ) - if report_ci: - metrics_dict["Metric_names"].append("loss") - metrics_dict["Metric_values"].append(total_loss) - metrics_dict["Lower_CI"].append("N/A") - metrics_dict["Upper_CI"].append("N/A") - metrics_dict["SE"].append("N/A") - - else: - metrics_dict["loss"] = total_loss - - torch.cuda.empty_cache() - - return results_df, metrics_dict - - def columns(network_task: Union[str, Task], mode: str, n_classes: Optional[int] = None): """ List of the columns' names in the TSV file containing the predictions. diff --git a/clinicadl/trainer/trainer.py b/clinicadl/trainer/trainer.py index 16d2d88d6..3c279d155 100644 --- a/clinicadl/trainer/trainer.py +++ b/clinicadl/trainer/trainer.py @@ -33,6 +33,7 @@ patch_to_read_json, ) from clinicadl.trainer.tasks_utils import create_training_config +from clinicadl.validator.validator import Validator if TYPE_CHECKING: from clinicadl.callbacks.callbacks import Callback @@ -43,8 +44,6 @@ generate_sampler, get_criterion, save_outputs, - test, - test_da, ) logger = getLogger("clinicadl.trainer") @@ -64,6 +63,7 @@ def __init__( """ self.config = config self.maps_manager = self._init_maps_manager(config) + self.validator = Validator() self._check_args() def _init_maps_manager(self, config) -> MapsManager: @@ -371,12 +371,14 @@ def _train_single( ) if cluster.master: - self.maps_manager._ensemble_prediction( + self.validator._ensemble_prediction( + self.maps_manager, "train", split, self.config.validation.selection_metrics, ) - self.maps_manager._ensemble_prediction( + self.validator._ensemble_prediction( + self.maps_manager, "validation", split, self.config.validation.selection_metrics, @@ -495,12 +497,14 @@ def _train_multi( resume = False if cluster.master: - self.maps_manager._ensemble_prediction( + self.validator._ensemble_prediction( + self.maps_manager, "train", split, self.config.validation.selection_metrics, ) - self.maps_manager._ensemble_prediction( + self.validator._ensemble_prediction( + self.maps_manager, "validation", split, self.config.validation.selection_metrics, @@ -706,12 +710,14 @@ def _train_ssda( resume=resume, ) - self.maps_manager._ensemble_prediction( + self.validator._ensemble_prediction( + self.maps_manager, "train", split, self.config.validation.selection_metrics, ) - self.maps_manager._ensemble_prediction( + self.validator._ensemble_prediction( + self.maps_manager, "validation", split, self.config.validation.selection_metrics, @@ -861,7 +867,7 @@ def _train( ): evaluation_flag = False - _, metrics_train = test( + _, metrics_train = self.validator.test( mode=self.maps_manager.mode, metrics_module=self.maps_manager.metrics_module, n_classes=self.maps_manager.n_classes, @@ -871,7 +877,7 @@ def _train( criterion=criterion, amp=self.maps_manager.std_amp, ) - _, metrics_valid = test( + _, metrics_valid = self.validator.test( mode=self.maps_manager.mode, metrics_module=self.maps_manager.metrics_module, n_classes=self.maps_manager.n_classes, @@ -928,7 +934,7 @@ def _train( model.zero_grad(set_to_none=True) logger.debug(f"Last checkpoint at the end of the epoch {epoch}") - _, metrics_train = test( + _, metrics_train = self.validator.test( mode=self.maps_manager.mode, metrics_module=self.maps_manager.metrics_module, n_classes=self.maps_manager.n_classes, @@ -938,7 +944,7 @@ def _train( criterion=criterion, amp=self.maps_manager.std_amp, ) - _, metrics_valid = test( + _, metrics_valid = self.validator.test( mode=self.maps_manager.mode, metrics_module=self.maps_manager.metrics_module, n_classes=self.maps_manager.n_classes, @@ -998,7 +1004,8 @@ def _train( epoch += 1 del model - self.maps_manager._test_loader( + self.validator._test_loader( + self.maps_manager, train_loader, criterion, "train", @@ -1007,7 +1014,8 @@ def _train( amp=self.maps_manager.std_amp, network=network, ) - self.maps_manager._test_loader( + self.validator._test_loader( + self.maps_manager, valid_loader, criterion, "validation", @@ -1018,7 +1026,8 @@ def _train( ) if save_outputs(self.maps_manager.network_task): - self.maps_manager._compute_output_tensors( + self.validator._compute_output_tensors( + self.maps_manager, train_loader.dataset, "train", split, @@ -1026,7 +1035,8 @@ def _train( nb_images=1, network=network, ) - self.maps_manager._compute_output_tensors( + self.validator._compute_output_tensors( + self.maps_manager, valid_loader.dataset, "validation", split, @@ -1400,7 +1410,8 @@ def _train_ssdann( epoch += 1 - self.maps_manager._test_loader_ssda( + self.validator._test_loader_ssda( + self.maps_manager, train_target_loader, criterion, data_group="train", @@ -1410,7 +1421,8 @@ def _train_ssdann( target=True, alpha=0, ) - self.maps_manager._test_loader_ssda( + self.validator._test_loader_ssda( + self.maps_manager, valid_loader, criterion, data_group="validation", @@ -1422,7 +1434,8 @@ def _train_ssdann( ) if save_outputs(self.maps_manager.network_task): - self.maps_manager._compute_output_tensors( + self.validator._compute_output_tensors( + self.maps_manager, train_target_loader.dataset, "train", split, @@ -1430,7 +1443,8 @@ def _train_ssdann( nb_images=1, network=network, ) - self.maps_manager._compute_output_tensors( + self.validator._compute_output_tensors( + self.maps_manager, train_target_loader.dataset, "validation", split, diff --git a/clinicadl/validator/config.py b/clinicadl/validator/config.py new file mode 100644 index 000000000..165b36dd0 --- /dev/null +++ b/clinicadl/validator/config.py @@ -0,0 +1,48 @@ +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Optional, Union + +from pydantic import ( + BaseModel, + ConfigDict, + computed_field, + field_validator, +) + +from clinicadl.utils.factories import DefaultFromLibrary + + +class ValidatorConfig(BaseModel): + """Base config class to configure the validator.""" + + maps_path: Path + mode: str + network_task: str + split_name: Optional[str] = None + num_networks: Optional[int] = None + fsdp: Optional[bool] = None + amp: Optional[bool] = None + metrics_module: Optional = None + n_classes: Optional[int] = None + nb_unfrozen_layers: Optional[int] = None + std_amp: Optional[bool] = None + + # pydantic config + model_config = ConfigDict( + validate_assignment=True, + use_enum_values=True, + validate_default=True, + ) + + @computed_field + @property + @abstractmethod + def metric(self) -> str: + """The name of the metric.""" + + @field_validator("get_not_nans", mode="after") + @classmethod + def validator_get_not_nans(cls, v): + assert not v, "get_not_nans not supported in ClinicaDL. Please set to False." + + return v diff --git a/clinicadl/validator/validator.py b/clinicadl/validator/validator.py new file mode 100644 index 000000000..d55810299 --- /dev/null +++ b/clinicadl/validator/validator.py @@ -0,0 +1,498 @@ +from logging import getLogger +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd +import torch +import torch.distributed as dist +from torch.amp import autocast +from torch.nn.modules.loss import _Loss +from torch.utils.data import DataLoader + +from clinicadl.maps_manager.maps_manager import MapsManager +from clinicadl.metrics.metric_module import MetricModule +from clinicadl.metrics.utils import find_selection_metrics +from clinicadl.network.network import Network +from clinicadl.trainer.tasks_utils import columns, compute_metrics, generate_test_row +from clinicadl.utils import cluster +from clinicadl.utils.computational.ddp import DDP, init_ddp +from clinicadl.utils.enum import ( + ClassificationLoss, + ClassificationMetric, + ReconstructionLoss, + ReconstructionMetric, + RegressionLoss, + RegressionMetric, + Task, +) +from clinicadl.utils.exceptions import ( + ClinicaDLArgumentError, + ClinicaDLConfigurationError, + MAPSError, +) + +logger = getLogger("clinicadl.maps_manager") +level_list: List[str] = ["warning", "info", "debug"] + + +# TODO save weights on CPU for better compatibility + + +class Validator: + def test( + self, + mode: str, + metrics_module: MetricModule, + n_classes: int, + network_task, + model: Network, + dataloader: DataLoader, + criterion: _Loss, + use_labels: bool = True, + amp: bool = False, + report_ci=False, + ) -> Tuple[pd.DataFrame, Dict[str, float]]: + """ + Computes the predictions and evaluation metrics. + + Parameters + ---------- + model: Network + The model trained. + dataloader: DataLoader + Wrapper of a CapsDataset. + criterion: _Loss + Function to calculate the loss. + use_labels: bool + If True the true_label will be written in output DataFrame + and metrics dict will be created. + amp: bool + If True, enables Pytorch's automatic mixed precision. + + Returns + ------- + the results and metrics on the image level. + """ + model.eval() + dataloader.dataset.eval() + + results_df = pd.DataFrame(columns=columns(network_task, mode, n_classes)) + total_loss = {} + with torch.no_grad(): + for i, data in enumerate(dataloader): + # initialize the loss list to save the loss components + with autocast("cuda", enabled=amp): + outputs, loss_dict = model(data, criterion, use_labels=use_labels) + + if i == 0: + for loss_component in loss_dict.keys(): + total_loss[loss_component] = 0 + for loss_component in total_loss.keys(): + total_loss[loss_component] += loss_dict[loss_component].float() + + # Generate detailed DataFrame + for idx in range(len(data["participant_id"])): + row = generate_test_row( + network_task, + mode, + metrics_module, + n_classes, + idx, + data, + outputs.float(), + ) + row_df = pd.DataFrame( + row, columns=columns(network_task, mode, n_classes) + ) + results_df = pd.concat([results_df, row_df]) + + del outputs, loss_dict + dataframes = [None] * dist.get_world_size() + dist.gather_object( + results_df, dataframes if dist.get_rank() == 0 else None, dst=0 + ) + if dist.get_rank() == 0: + results_df = pd.concat(dataframes) + del dataframes + results_df.reset_index(inplace=True, drop=True) + + if not use_labels: + metrics_dict = None + else: + metrics_dict = compute_metrics( + network_task, results_df, metrics_module, report_ci=report_ci + ) + for loss_component in total_loss.keys(): + dist.reduce(total_loss[loss_component], dst=0) + loss_value = total_loss[loss_component].item() / cluster.world_size + + if report_ci: + metrics_dict["Metric_names"].append(loss_component) + metrics_dict["Metric_values"].append(loss_value) + metrics_dict["Lower_CI"].append("N/A") + metrics_dict["Upper_CI"].append("N/A") + metrics_dict["SE"].append("N/A") + + else: + metrics_dict[loss_component] = loss_value + + torch.cuda.empty_cache() + + return results_df, metrics_dict + + def test_da( + self, + mode: str, + metrics_module: MetricModule, + n_classes: int, + network_task: Union[str, Task], + model: Network, + dataloader: DataLoader, + criterion: _Loss, + alpha: float = 0, + use_labels: bool = True, + target: bool = True, + report_ci=False, + ) -> Tuple[pd.DataFrame, Dict[str, float]]: + """ + Computes the predictions and evaluation metrics. + + Args: + model: the model trained. + dataloader: wrapper of a CapsDataset. + criterion: function to calculate the loss. + use_labels: If True the true_label will be written in output DataFrame + and metrics dict will be created. + Returns: + the results and metrics on the image level. + """ + model.eval() + dataloader.dataset.eval() + results_df = pd.DataFrame(columns=columns(network_task, mode, n_classes)) + total_loss = 0 + with torch.no_grad(): + for i, data in enumerate(dataloader): + outputs, loss_dict = model.compute_outputs_and_loss_test( + data, criterion, alpha, target + ) + total_loss += loss_dict["loss"].item() + + # Generate detailed DataFrame + for idx in range(len(data["participant_id"])): + row = generate_test_row( + network_task, + mode, + metrics_module, + n_classes, + idx, + data, + outputs, + ) + row_df = pd.DataFrame( + row, columns=columns(network_task, mode, n_classes) + ) + results_df = pd.concat([results_df, row_df]) + + del outputs, loss_dict + results_df.reset_index(inplace=True, drop=True) + + if not use_labels: + metrics_dict = None + else: + metrics_dict = compute_metrics( + network_task, results_df, metrics_module, report_ci=report_ci + ) + if report_ci: + metrics_dict["Metric_names"].append("loss") + metrics_dict["Metric_values"].append(total_loss) + metrics_dict["Lower_CI"].append("N/A") + metrics_dict["Upper_CI"].append("N/A") + metrics_dict["SE"].append("N/A") + + else: + metrics_dict["loss"] = total_loss + + torch.cuda.empty_cache() + + return results_df, metrics_dict + + def _test_loader( + self, + maps_manager: MapsManager, + dataloader, + criterion, + data_group: str, + split: int, + selection_metrics, + use_labels=True, + gpu=None, + amp=False, + network=None, + report_ci=True, + ): + """ + Launches the testing task on a dataset wrapped by a DataLoader and writes prediction TSV files. + + Args: + dataloader (torch.utils.data.DataLoader): DataLoader wrapping the test CapsDataset. + criterion (torch.nn.modules.loss._Loss): optimization criterion used during training. + data_group (str): name of the data group used for the testing task. + split (int): Index of the split used to train the model tested. + selection_metrics (list[str]): List of metrics used to select the best models which are tested. + use_labels (bool): If True, the labels must exist in test meta-data and metrics are computed. + gpu (bool): If given, a new value for the device of the model will be computed. + amp (bool): If enabled, uses Automatic Mixed Precision (requires GPU usage). + network (int): Index of the network tested (only used in multi-network setting). + """ + for selection_metric in selection_metrics: + if cluster.master: + log_dir = ( + maps_manager.maps_path + / f"{maps_manager.split_name}-{split}" + / f"best-{selection_metric}" + / data_group + ) + maps_manager.write_description_log( + log_dir, + data_group, + dataloader.dataset.config.data.caps_dict, + dataloader.dataset.config.data.data_df, + ) + + # load the best trained model during the training + model, _ = maps_manager._init_model( + transfer_path=maps_manager.maps_path, + split=split, + transfer_selection=selection_metric, + gpu=gpu, + network=network, + ) + model = DDP( + model, + fsdp=maps_manager.fully_sharded_data_parallel, + amp=maps_manager.amp, + ) + + prediction_df, metrics = self.test( + mode=maps_manager.mode, + metrics_module=maps_manager.metrics_module, + n_classes=maps_manager.n_classes, + network_task=maps_manager.network_task, + model=model, + dataloader=dataloader, + criterion=criterion, + use_labels=use_labels, + amp=amp, + report_ci=report_ci, + ) + if use_labels: + if network is not None: + metrics[f"{maps_manager.mode}_id"] = network + + loss_to_log = ( + metrics["Metric_values"][-1] if report_ci else metrics["loss"] + ) + + logger.info( + f"{maps_manager.mode} level {data_group} loss is {loss_to_log} for model selected on {selection_metric}" + ) + + if cluster.master: + # Replace here + maps_manager._mode_level_to_tsv( + prediction_df, + metrics, + split, + selection_metric, + data_group=data_group, + ) + + def _test_loader_ssda( + self, + maps_manager: MapsManager, + dataloader, + criterion, + alpha, + data_group, + split, + selection_metrics, + use_labels=True, + gpu=None, + network=None, + target=False, + report_ci=True, + ): + """ + Launches the testing task on a dataset wrapped by a DataLoader and writes prediction TSV files. + + Args: + dataloader (torch.utils.data.DataLoader): DataLoader wrapping the test CapsDataset. + criterion (torch.nn.modules.loss._Loss): optimization criterion used during training. + data_group (str): name of the data group used for the testing task. + split (int): Index of the split used to train the model tested. + selection_metrics (list[str]): List of metrics used to select the best models which are tested. + use_labels (bool): If True, the labels must exist in test meta-data and metrics are computed. + gpu (bool): If given, a new value for the device of the model will be computed. + network (int): Index of the network tested (only used in multi-network setting). + """ + for selection_metric in selection_metrics: + log_dir = ( + maps_manager.maps_path + / f"{maps_manager.split_name}-{split}" + / f"best-{selection_metric}" + / data_group + ) + maps_manager.write_description_log( + log_dir, + data_group, + dataloader.dataset.caps_dict, + dataloader.dataset.df, + ) + + # load the best trained model during the training + model, _ = maps_manager._init_model( + transfer_path=maps_manager.maps_path, + split=split, + transfer_selection=selection_metric, + gpu=gpu, + network=network, + ) + prediction_df, metrics = self.test_da( + network_task=maps_manager.network_task, + model=model, + dataloader=dataloader, + criterion=criterion, + target=target, + report_ci=report_ci, + mode=maps_manager.mode, + metrics_module=maps_manager.metrics_module, + n_classes=maps_manager.n_classes, + ) + if use_labels: + if network is not None: + metrics[f"{maps_manager.mode}_id"] = network + + if report_ci: + loss_to_log = metrics["Metric_values"][-1] + else: + loss_to_log = metrics["loss"] + + logger.info( + f"{maps_manager.mode} level {data_group} loss is {loss_to_log} for model selected on {selection_metric}" + ) + + # Replace here + maps_manager._mode_level_to_tsv( + prediction_df, metrics, split, selection_metric, data_group=data_group + ) + + @torch.no_grad() + def _compute_output_tensors( + self, + maps_manager: MapsManager, + dataset, + data_group, + split, + selection_metrics, + nb_images=None, + gpu=None, + network=None, + ): + """ + Compute the output tensors and saves them in the MAPS. + + Args: + dataset (clinicadl.caps_dataset.data.CapsDataset): wrapper of the data set. + data_group (str): name of the data group used for the task. + split (int): split number. + selection_metrics (list[str]): metrics used for model selection. + nb_images (int): number of full images to write. Default computes the outputs of the whole data set. + gpu (bool): If given, a new value for the device of the model will be computed. + network (int): Index of the network tested (only used in multi-network setting). + """ + for selection_metric in selection_metrics: + # load the best trained model during the training + model, _ = maps_manager._init_model( + transfer_path=maps_manager.maps_path, + split=split, + transfer_selection=selection_metric, + gpu=gpu, + network=network, + nb_unfrozen_layer=maps_manager.nb_unfrozen_layer, + ) + model = DDP( + model, + fsdp=maps_manager.fully_sharded_data_parallel, + amp=maps_manager.amp, + ) + model.eval() + + tensor_path = ( + maps_manager.maps_path + / f"{maps_manager.split_name}-{split}" + / f"best-{selection_metric}" + / data_group + / "tensors" + ) + if cluster.master: + tensor_path.mkdir(parents=True, exist_ok=True) + dist.barrier() + + if nb_images is None: # Compute outputs for the whole data set + nb_modes = len(dataset) + else: + nb_modes = nb_images * dataset.elem_per_image + + for i in [ + *range(cluster.rank, nb_modes, cluster.world_size), + *range(int(nb_modes % cluster.world_size <= cluster.rank)), + ]: + data = dataset[i] + image = data["image"] + x = image.unsqueeze(0).to(model.device) + with autocast("cuda", enabled=maps_manager.std_amp): + output = model(x) + output = output.squeeze(0).cpu().float() + participant_id = data["participant_id"] + session_id = data["session_id"] + mode_id = data[f"{maps_manager.mode}_id"] + input_filename = f"{participant_id}_{session_id}_{maps_manager.mode}-{mode_id}_input.pt" + output_filename = f"{participant_id}_{session_id}_{maps_manager.mode}-{mode_id}_output.pt" + torch.save(image, tensor_path / input_filename) + torch.save(output, tensor_path / output_filename) + logger.debug(f"File saved at {[input_filename, output_filename]}") + + def _ensemble_prediction( + self, + maps_manager: MapsManager, + data_group, + split, + selection_metrics, + use_labels=True, + skip_leak_check=False, + ): + """Computes the results on the image-level.""" + + if not selection_metrics: + selection_metrics = find_selection_metrics( + maps_manager.maps_path, maps_manager.split_name, split + ) + + for selection_metric in selection_metrics: + ##################### + # Soft voting + if maps_manager.num_networks > 1 and not skip_leak_check: + maps_manager._ensemble_to_tsv( + split, + selection=selection_metric, + data_group=data_group, + use_labels=use_labels, + ) + elif maps_manager.mode != "image" and not skip_leak_check: + maps_manager._mode_to_image_tsv( + split, + selection=selection_metric, + data_group=data_group, + use_labels=use_labels, + )