added ms weights adaptation

ChristopherKotthoff · ChristopherKotthoff · commit 206dbf9bb28f · 2025-01-28T17:22:42.000Z
diff --git a/detectree2/models/train.py b/detectree2/models/train.py
@@ -13,6 +13,7 @@
 import time
 from pathlib import Path
 from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse
 
 import cv2
 import detectron2.data.transforms as T  # noqa:N812
@@ -373,6 +374,50 @@ def train(self):
             verify_results(self.cfg, self._last_eval_results)
             return self._last_eval_results
 
+    def resume_or_load(self, resume=True):
+        """
+        If `resume==True` and `cfg.OUTPUT_DIR` contains the last checkpoint (defined by
+        a `last_checkpoint` file), resume from the file. Resuming means loading all
+        available states (eg. optimizer and scheduler) and update iteration counter
+        from the checkpoint. ``cfg.MODEL.WEIGHTS`` will not be used.
+
+        Otherwise, this is considered as an independent training. The method will load model
+        weights from the file `cfg.MODEL.WEIGHTS` (but will not load other states) and start
+        from iteration 0.
+
+        Args:
+            resume (bool): whether to do resume or not
+        """
+        self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume)
+        if resume and self.checkpointer.has_checkpoint():
+            # The checkpoint stores the training iteration that just finished, thus we start
+            # at the next iteration
+            self.start_iter = self.iter + 1
+
+        if self.cfg.MODEL.WEIGHTS:
+            checkpoint = torch.tensor(
+                self.checkpointer._load_file(
+                    self.checkpointer.path_manager.get_local_path(
+                        urlparse(self.cfg.MODEL.WEIGHTS)._replace(
+                            query="").geturl()))['model']['backbone.bottom_up.stem.conv1.weight']).to(
+                                self.model.backbone.bottom_up.stem.conv1.weight.device)
+            input_channels_in_checkpoint = checkpoint.shape[1]
+            input_channels_in_model = self.model.backbone.bottom_up.stem.conv1.weight.shape[1]
+            if input_channels_in_checkpoint != input_channels_in_model:
+                logger = logging.getLogger("detectree2")
+                if input_channels_in_checkpoint != 3:
+                    logger.warning(
+                        "Input channel modification only works if checkpoint was trained on RGB images (3 channels). The first three channels will be copied and then repeated in the model."
+                    )
+                logger.warning(
+                    "Mismatch in input channels in checkpoint and model, meaning fvcommon would not have been able to automatically load them. Adjusting weights for 'backbone.bottom_up.stem.conv1.weight' manually."
+                )
+                with torch.no_grad():
+                    self.model.backbone.bottom_up.stem.conv1.weight[:, :
+                                                                    input_channels_in_checkpoint] = checkpoint[:, :
+                                                                                                               input_channels_in_checkpoint]
+                multiply_conv1_weights(self.model)
+
     @classmethod
     def build_evaluator(cls, cfg, dataset_name, output_folder=None):
         """
@@ -964,7 +1009,7 @@ def predictions_on_data(
                 json.dump(evaluations, dest)
 
 
-def modify_conv1_weights(model, num_input_channels):
+def multiply_conv1_weights(model):
     """
     Modify the weights of the first convolutional layer (conv1) to accommodate a different number of input channels.
 
@@ -974,12 +1019,12 @@ def modify_conv1_weights(model, num_input_channels):
 
     Args:
         model (torch.nn.Module): The model containing the convolutional layer to modify.
-        num_input_channels (int): The number of input channels for the new conv1 layer.
 
     """
     with torch.no_grad():
         # Retrieve the original weights of the conv1 layer
         old_weights = model.backbone.bottom_up.stem.conv1.weight
+        num_input_channels = model.backbone.bottom_up.stem.conv1.weight.shape[1]  # The number of input channels
 
         # Create a new weight tensor with the desired number of input channels
         # The shape is (out_channels, in_channels, height, width)
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
@@ -399,21 +399,21 @@ The number of bands can be checked with rasterio:
    print(f'The raster has {num_bands} bands.')
 
 
-Due to the additional bands, we must modify the weights of the first convolutional layer (conv1) to accommodate a
-different number of input channels. This is done with the ``modify_conv1_weights`` function. The extension of the 
-``cfg.MODEL.PIXEL_MEAN`` and ``cfg.MODEL.PIXEL_STD`` lists to include the additional bands happens within the 
-``setup_cfg`` function when ``num_bands`` is set to a value greater than 3. ``imgmode`` should be set to ``"ms"`` to
-ensure the correct training routines are called.
+Due to the additional bands, the weights of the first convolutional layer (conv1) are modified to accommodate a
+different number of input channels. This is automatically done in the case of ``imgmode`` being set to ``"ms"``.
+The first three input weights are multiplied across the new bands. The extension of the ``cfg.MODEL.PIXEL_MEAN``
+and ``cfg.MODEL.PIXEL_STD`` lists to include the additional bands happens within the ``setup_cfg`` function when
+``num_bands`` is set to a value greater than 3. ``imgmode`` should be set to ``"ms"`` to ensure the correct training
+routines are called.
 
 .. code-block:: python
 
    from datetime import date
-   from detectron2.modeling import build_model
    import torch.nn as nn
    import torch.nn.init as init
    from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
    import numpy as np
-   from detectree2.models.train import modify_conv1_weights, MyTrainer, setup_cfg
+   from detectree2.models.train import MyTrainer, setup_cfg
 
    # Good idea to keep track of the date if producing multiple models
    today = date.today()
@@ -433,12 +433,6 @@ ensure the correct training routines are called.
                   max_iter=500000, out_dir=out_dir, resize = "rand_fixed", imgmode="ms",
                   num_bands= num_bands) # update_model arg can be used to load in trained  model
 
-   # Build the model
-   model = build_model(cfg)
-
-   # Adjust input layer to accept correct number of channels
-   modify_conv1_weights(model, num_input_channels=num_bands)
-
 
 With additional bands, more data is being passed through the network per image so it may be neessary to reduce the 
 number of images per batch. Only do this is you a getting warnings/errors about memory usage (e.g.