Fixed validation for 1d signals and added customizable dice calculation

Lukas Arts · Lukas Arts · commit 40bbe29d1ae1 · 2024-10-01T18:14:26.000+02:00
diff --git a/nnUNet_results b/nnUNet_results
@@ -0,0 +1 @@
+/media/lukas/f476eab7-6c09-4db6-bfcd-2922b3c3502b/UU/ASRA/Segmentation/nnUNet_results
diff --git a/nnunetv2/evaluation/evaluate_predictions.py b/nnunetv2/evaluation/evaluate_predictions.py
@@ -93,6 +93,8 @@ def compute_metrics(reference_file: str, prediction_file: str, image_reader_writ
     seg_ref, seg_ref_dict = image_reader_writer.read_seg(reference_file)
     seg_pred, seg_pred_dict = image_reader_writer.read_seg(prediction_file)
 
+    print(reference_file, prediction_file)
+
     ignore_mask = seg_ref == ignore_label if ignore_label is not None else None
 
     results = {}
diff --git a/nnunetv2/experiment_planning/experiment_planners/residual_unets/residual_encoder_unet_planners.py b/nnunetv2/experiment_planning/experiment_planners/residual_unets/residual_encoder_unet_planners.py
@@ -27,6 +27,7 @@ def __init__(self, dataset_name_or_id: Union[str, int],
         # much as possible
         self.UNet_reference_val_3d = 680000000
         self.UNet_reference_val_2d = 135000000
+        self.UNet_reference_val_1d = 135000000
         self.UNet_blocks_per_stage_encoder = (1, 3, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6)
         self.UNet_blocks_per_stage_decoder = (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
 
@@ -36,7 +37,7 @@ def generate_data_identifier(self, configuration_name: str) -> str:
         same name. In order to distinguish the associated data we need a data identifier that reflects not just the
         config but also the plans it originates from
         """
-        if configuration_name == '2d' or configuration_name == '3d_fullres':
+        if configuration_name == '1d' or configuration_name == '2d' or configuration_name == '3d_fullres':
             # we do not deviate from ExperimentPlanner so we can reuse its data
             return 'nnUNetPlans' + '_' + configuration_name
         else:
@@ -76,6 +77,9 @@ def _keygen(patch_size, strides):
             initial_patch_size = [round(i) for i in tmp * (256 ** 3 / np.prod(tmp)) ** (1 / 3)]
         elif len(spacing) == 2:
             initial_patch_size = [round(i) for i in tmp * (2048 ** 2 / np.prod(tmp)) ** (1 / 2)]
+        elif len(spacing) == 1:
+            #initial patch size for 1d signals is the entire signal
+            initial_patch_size = [round(median_shape[0])]
         else:
             raise RuntimeError()
 
@@ -129,7 +133,7 @@ def _keygen(patch_size, strides):
 
         # how large is the reference for us here (batch size etc)?
         # adapt for our vram target
-        reference = (self.UNet_reference_val_2d if len(spacing) == 2 else self.UNet_reference_val_3d) * \
+        reference = (self.UNet_reference_val_1d if len(spacing) == 1 else (self.UNet_reference_val_2d if len(spacing) == 2 else self.UNet_reference_val_3d)) * \
                     (self.UNet_vram_target_GB / self.UNet_reference_val_corresp_GB)
 
         while estimate > reference:
@@ -183,7 +187,7 @@ def _keygen(patch_size, strides):
 
         # alright now let's determine the batch size. This will give self.UNet_min_batch_size if the while loop was
         # executed. If not, additional vram headroom is used to increase batch size
-        ref_bs = self.UNet_reference_val_corresp_bs_2d if len(spacing) == 2 else self.UNet_reference_val_corresp_bs_3d
+        ref_bs = self.UNet_reference_val_corresp_bs_1d if len(spacing) == 1 else (self.UNet_reference_val_corresp_bs_2d if len(spacing) == 2 else self.UNet_reference_val_corresp_bs_3d)
         batch_size = round((reference / estimate) * ref_bs)
 
         # we need to cap the batch size to cover at most 5% of the entire dataset. Overfitting precaution. We cannot
@@ -241,6 +245,7 @@ def __init__(self, dataset_name_or_id: Union[str, int],
         # this is supposed to give the same GPU memory requirement as the default nnU-Net
         self.UNet_reference_val_3d = 680000000
         self.UNet_reference_val_2d = 135000000
+        self.UNet_reference_val_1d = 135000000
         self.max_dataset_covered = 1
 
 
diff --git a/nnunetv2/imageio/numpy_reader_writer.py b/nnunetv2/imageio/numpy_reader_writer.py
@@ -27,14 +27,17 @@ class NumpyIO(BaseReaderWriter):
         '.npy'
     ]
 
-    def read_images(self, image_fnames: Union[List[str], Tuple[str, ...]]) -> Tuple[np.ndarray, dict]:
+    def read_images(self, image_fnames: Union[List[str], Tuple[str, ...]], annotations=False) -> Tuple[np.ndarray, dict]:
         images = []
         for f in image_fnames:
             npy_img = np.load(f)
             assert npy_img.ndim == 1 or npy_img.ndim == 2, "Only 1D timeseries with one or more channels supported"
             if npy_img.ndim == 2:
                 # channel to front, add additional dim so that we have shape (c, 1, 1, X)
-                images.append(npy_img.transpose((1, 0))[:, None, None])
+                if annotations:
+                    images.append(npy_img[None, None, :])
+                else:
+                    images.append(npy_img.transpose((1, 0))[:, None, None])
             elif npy_img.ndim == 1:
                 # grayscale image
                 images.append(npy_img[None, None, None])
@@ -49,7 +52,7 @@ def read_images(self, image_fnames: Union[List[str], Tuple[str, ...]]) -> Tuple[
         return np.vstack(images, dtype=np.float32, casting='unsafe'), {'spacing': (999, 999, 1)}
 
     def read_seg(self, seg_fname: str) -> Tuple[np.ndarray, dict]:
-        return self.read_images((seg_fname, ))
+        return self.read_images((seg_fname, ), annotations=True)
 
     def write_seg(self, seg: np.ndarray, output_fname: str, properties: dict) -> None:
         np.save(output_fname, seg[0].astype(np.uint8, copy=False))
diff --git a/nnunetv2/inference/predict_from_raw_data.py b/nnunetv2/inference/predict_from_raw_data.py
@@ -502,7 +502,23 @@ def predict_logits_from_preprocessed_data(self, data: torch.Tensor) -> torch.Ten
 
     def _internal_get_sliding_window_slicers(self, image_size: Tuple[int, ...]):
         slicers = []
-        if len(self.configuration_manager.patch_size) < len(image_size):
+        dim = len(self.configuration_manager.patch_size)
+
+        if dim == 1:
+            steps = compute_steps_for_sliding_window(image_size[2:], self.configuration_manager.patch_size,
+                                                     self.tile_step_size)
+
+            if self.verbose: print(f'n_steps {image_size[0] * len(steps[0]) * len(steps[1])}, image size is'
+                                   f' {image_size}, tile_size {self.configuration_manager.patch_size}, '
+                                   f'tile_step_size {self.tile_step_size}\nsteps:\n{steps}')
+
+            for d in range(image_size[0]):
+                for sx in steps[0]:
+                    slicers.append(
+                        tuple([slice(None), d, 0, slice(sx, sx + self.configuration_manager.patch_size[0])]))
+
+        elif dim == 2:
+        #if len(self.configuration_manager.patch_size) < len(image_size):
             assert len(self.configuration_manager.patch_size) == len(
                 image_size) - 1, 'if tile_size has less entries than image_size, ' \
                                  'len(tile_size) ' \
@@ -520,7 +536,7 @@ def _internal_get_sliding_window_slicers(self, image_size: Tuple[int, ...]):
                         slicers.append(
                             tuple([slice(None), d, *[slice(si, si + ti) for si, ti in
                                                      zip((sx, sy), self.configuration_manager.patch_size)]]))
-        else:
+        elif dim == 3:
             steps = compute_steps_for_sliding_window(image_size, self.configuration_manager.patch_size,
                                                      self.tile_step_size)
             if self.verbose: print(
@@ -532,6 +548,10 @@ def _internal_get_sliding_window_slicers(self, image_size: Tuple[int, ...]):
                         slicers.append(
                             tuple([slice(None), *[slice(si, si + ti) for si, ti in
                                                   zip((sx, sy, sz), self.configuration_manager.patch_size)]]))
+
+        else:
+            raise NotImplementedError('This function only supports 1D, 2D and 3D images')
+
         return slicers
 
     def _internal_maybe_mirror_and_predict(self, x: torch.Tensor) -> torch.Tensor:
diff --git a/nnunetv2/paths.py b/nnunetv2/paths.py
@@ -18,9 +18,9 @@
 PLEASE READ paths.md FOR INFORMATION TO HOW TO SET THIS UP
 """
 
-nnUNet_raw = "/Users/lukasarts/Dropbox/UU/ASRA/nnUNet/nnUNet_raw"
-nnUNet_preprocessed = "/Users/lukasarts/Dropbox/UU/ASRA/nnUNet/nnUNet_preprocessed"
-nnUNet_results = "/Users/lukasarts/Dropbox/UU/ASRA/nnUNet/nnUNet_results"
+nnUNet_raw = "/home/lukas/UU/ASRA/Analysis/nnUNet/nnUNet_raw"
+nnUNet_preprocessed = "/home/lukas/UU/ASRA/Analysis/nnUNet/nnUNet_preprocessed"
+nnUNet_results = "/home/lukas/UU/ASRA/Analysis/nnUNet/nnUNet_results"
 
 if nnUNet_raw is None:
     print("nnUNet_raw is not defined and nnU-Net can only be used on data for which preprocessed files "
diff --git a/nnunetv2/run/run_training.py b/nnunetv2/run/run_training.py
@@ -284,4 +284,4 @@ def run_training_entry():
     # multiprocessing.set_start_method("spawn")
     #run_training_entry()
 
-    run_training('11', '1d', 0, 'nnUNetTrainer', 'nnUNetPlans', None, 1, False, False, False, False, False, False, device=torch.device('cpu'))
+    run_training('12', '1d', 1, 'nnUNetTrainer', 'nnUNetPlans', None, 1, False, False, False, False, False, False, device=torch.device('cuda'))
diff --git a/nnunetv2/training/nnUNetTrainer/nnUNetTrainer.py b/nnunetv2/training/nnUNetTrainer/nnUNetTrainer.py
@@ -147,10 +147,10 @@ def __init__(self, plans: dict, configuration: str, fold: int, dataset_json: dic
         ### Some hyperparameters for you to fiddle with
         self.initial_lr = 1e-2
         self.weight_decay = 3e-5
-        self.oversample_foreground_percent = 0.33
-        self.num_iterations_per_epoch = 250
-        self.num_val_iterations_per_epoch = 50
-        self.num_epochs = 1000
+        self.oversample_foreground_percent = 0
+        self.num_iterations_per_epoch = 100
+        self.num_val_iterations_per_epoch = 20
+        self.num_epochs = 64
         self.current_epoch = 0
         self.enable_deep_supervision = True
 
@@ -203,6 +203,9 @@ def __init__(self, plans: dict, configuration: str, fold: int, dataset_json: dic
                                "#######################################################################\n",
                                also_print_to_console=True, add_timestamp=False)
 
+    def count_trainable_params(self, model) -> int:
+        return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
     def initialize(self):
         if not self.was_initialized:
             self.num_input_channels = determine_num_input_channels(self.plans_manager, self.configuration_manager,
@@ -216,10 +219,14 @@ def initialize(self):
                 self.label_manager.num_segmentation_heads,
                 self.enable_deep_supervision
             ).to(self.device)
+
+            print("NUM PARAMS:", self.count_trainable_params(self.network))
+
             # compile network for free speedup
-            if self._do_i_compile():
-                self.print_to_log_file('Using torch.compile...')
-                self.network = torch.compile(self.network)
+            # gives errors when compiling network
+            # if self._do_i_compile():
+            #     self.print_to_log_file('Using torch.compile...')
+            #     self.network = torch.compile(self.network)
 
             self.optimizer, self.lr_scheduler = self.configure_optimizers()
             # if ddp, wrap in DDP wrapper
@@ -959,8 +966,8 @@ def on_train_start(self):
 
         self._save_debug_information()
 
-        # print(f"batch size: {self.batch_size}")
-        # print(f"oversample: {self.oversample_foreground_percent}")
+        print(f"batch size: {self.batch_size}")
+        print(f"oversample: {self.oversample_foreground_percent}")
 
     def on_train_end(self):
         # dirty hack because on_epoch_end increments the epoch counter and this is executed afterwards.
@@ -1028,6 +1035,7 @@ def train_step(self, batch: dict) -> dict:
             l.backward()
             torch.nn.utils.clip_grad_norm_(self.network.parameters(), 12)
             self.optimizer.step()
+        
         return {'loss': l.detach().cpu().numpy()}
 
     def on_train_epoch_end(self, train_outputs: List[dict]):
@@ -1059,6 +1067,7 @@ def validation_step(self, batch: dict) -> dict:
         # If the device_type is 'cpu' then it's slow as heck and needs to be disabled.
         # If the device_type is 'mps' then it will complain that mps is not implemented, even if enabled=False is set. Whyyyyyyy. (this is why we don't make use of enabled=False)
         # So autocast will only be active if we have a cuda device.
+
         with autocast(self.device.type, enabled=True) if self.device.type == 'cuda' else dummy_context():
             output = self.network(data)
             del data
@@ -1101,14 +1110,17 @@ def validation_step(self, batch: dict) -> dict:
         tp_hard = tp.detach().cpu().numpy()
         fp_hard = fp.detach().cpu().numpy()
         fn_hard = fn.detach().cpu().numpy()
-        if not self.label_manager.has_regions:
-            # if we train with regions all segmentation heads predict some kind of foreground. In conventional
-            # (softmax training) there needs tobe one output for the background. We are not interested in the
-            # background Dice
-            # [1:] in order to remove background
-            tp_hard = tp_hard[1:]
-            fp_hard = fp_hard[1:]
-            fn_hard = fn_hard[1:]
+
+        # we now handle the removal of the background dice in the labelmanager
+
+        # if not self.label_manager.has_regions:
+        #     # if we train with regions all segmentation heads predict some kind of foreground. In conventional
+        #     # (softmax training) there needs tobe one output for the background. We are not interested in the
+        #     # background Dice
+        #     # [1:] in order to remove background
+        #     tp_hard = tp_hard[1:]
+        #     fp_hard = fp_hard[1:]
+        #     fn_hard = fn_hard[1:]
 
         return {'loss': l.detach().cpu().numpy(), 'tp_hard': tp_hard, 'fp_hard': fp_hard, 'fn_hard': fn_hard}
 
@@ -1118,6 +1130,10 @@ def on_validation_epoch_end(self, val_outputs: List[dict]):
         fp = np.sum(outputs_collated['fp_hard'], 0)
         fn = np.sum(outputs_collated['fn_hard'], 0)
 
+        tp = tp[self.label_manager._get_indices_to_calc_dice()]
+        fp = fp[self.label_manager._get_indices_to_calc_dice()]
+        fn = fn[self.label_manager._get_indices_to_calc_dice()]
+
         if self.is_ddp:
             world_size = dist.get_world_size()
 
diff --git a/nnunetv2/utilities/generate_dataset.py b/nnunetv2/utilities/generate_dataset.py
@@ -33,7 +33,7 @@ def simulate_ecg(length, fs):
 
 def generate_dataset(n_files=10, length=2048, fs=200, folder=""):
     dataset = {}
-    basedir = '/Users/lukasarts/Dropbox/UU/ASRA/nnUNet/nnUNet_raw/'
+    basedir = 'nnUNet_raw/'
     if folder != "" and not os.path.exists(os.path.join(basedir, folder)):
         os.makedirs(os.path.join(basedir, folder))
 
@@ -52,7 +52,7 @@ def generate_dataset(n_files=10, length=2048, fs=200, folder=""):
     n_files = 25
     length = 10
     fs = 200
-    folder = 'Dataset0011_test'
+    folder = 'Dataset011_test'
     generate_dataset(n_files, length, fs, folder=folder)
 
 
diff --git a/nnunetv2/utilities/label_handling/label_handling.py b/nnunetv2/utilities/label_handling/label_handling.py
@@ -19,10 +19,11 @@
 
 
 class LabelManager(object):
-    def __init__(self, label_dict: dict, regions_class_order: Union[List[int], None], force_use_labels: bool = False,
+    def __init__(self, label_dict: dict, regions_class_order: Union[List[int], None], use_for_validation: Union[dict, None], force_use_labels: bool = False,
                  inference_nonlin=None):
         self._sanity_check(label_dict)
         self.label_dict = label_dict
+        self.use_for_validation = use_for_validation
         self.regions_class_order = regions_class_order
         self._force_use_labels = force_use_labels
 
@@ -74,6 +75,13 @@ def _get_all_labels(self) -> List[int]:
         all_labels.sort()
         return all_labels
 
+    def _get_indices_to_calc_dice(self) -> List[int]:
+        indices = []
+        for l, b in self.use_for_validation.items():
+            if b:
+                indices.append(self.label_dict[l])
+        return indices
+
     def _get_regions(self) -> Union[None, List[Union[int, Tuple[int, ...]]]]:
         if not self._has_regions or self._force_use_labels:
             return None
diff --git a/nnunetv2/utilities/pkl_to_dataset.py b/nnunetv2/utilities/pkl_to_dataset.py
@@ -0,0 +1,81 @@
+import pickle
+import numpy as np
+import argparse
+import glob
+import os
+import json
+
+from nnunetv2.paths import nnUNet_preprocessed, nnUNet_raw, nnUNet_results
+
+def from_one_hot_to_indices(segmentation):
+    indices = np.zeros(segmentation.shape[1:])
+    indices[segmentation[0] == 1] = 1
+    indices[segmentation[1] == 1] = 2
+    indices[segmentation[2] == 1] = 3
+    indices[segmentation[4] == 1] = 4
+    return indices
+
+def pkl_to_dataset(pkl_paths, folder=""):
+    if not isinstance(pkl_paths, list):
+        pkl_paths = [pkl_paths]
+
+    data = []
+    for pkl_path in pkl_paths:
+        with open(pkl_path, 'rb') as f:
+            data += pickle.load(f)
+
+    if folder == "":
+        datasets = glob.glob(nnUNet_raw+'/Dataset*')
+        ids = [int(d.split('/')[-1].split('Dataset')[1].split('_')[0]) for d in datasets]
+        ids.sort()
+        dataset_id = ids[-1] + 1
+        base = "Dataset%03.0d" % dataset_id
+        for pkl_path in pkl_paths:
+            base += "_"+pkl_path.split('/')[-1].split('.')[0]
+        folder = base
+
+    basedir = 'nnUNet_raw/'
+    if folder != "" and not os.path.exists(os.path.join(basedir, folder)):
+        os.makedirs(os.path.join(basedir, folder))
+
+    if not os.path.exists(os.path.join(basedir, folder, 'imagesTr')):
+        os.makedirs(os.path.join(basedir, folder, 'imagesTr'))
+    if not os.path.exists(os.path.join(basedir, folder, 'labelsTr')):
+        os.makedirs(os.path.join(basedir, folder, 'labelsTr'))
+
+    nfiles = 0
+    for record in data:
+        signal = record["signal"]
+        segmentation = from_one_hot_to_indices(record["segmentation"])
+        haslabel = record["is_labeled"]
+        db = record["db"]
+
+        if haslabel and db != "STANFORD":
+            nfiles += 1
+            np.save(os.path.join(basedir, folder, 'imagesTr', f'case_{record["record"]}_0000.npy'), signal)
+            np.save(os.path.join(basedir, folder, 'labelsTr', f'case_{record["record"]}.npy'), segmentation)
+
+    jsn = {
+            "channel_names": {
+                "0": "LeadII"
+            },
+            "labels": {
+                "background": 0,
+                "p_wave": 1,
+                "qrs_wave": 2,
+                "t_wave": 3,
+                "noise": 4
+            },
+            "numTraining": nfiles,
+            "file_ending": ".npy"
+        }
+
+    with open(os.path.join(basedir, folder, 'dataset.json'), 'w') as f:
+        json.dump(jsn, f)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Convert a pkl file to a dataset')
+    parser.add_argument('-ps','--pkl_paths', nargs='+', help='Path(s) to the pkl file', required=True)
+    args = parser.parse_args()
+    pkl_to_dataset(args.pkl_paths)
diff --git a/nnunetv2/utilities/plans_handling/plans_handler.py b/nnunetv2/utilities/plans_handling/plans_handler.py
diff --git a/train_all_folds.sh b/train_all_folds.sh

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+/media/lukas/f476eab7-6c09-4db6-bfcd-2922b3c3502b/UU/ASRA/Segmentation/nnUNet_results`