Merge remote-tracking branch 'upstream/dev' into 4922-step-1

wyli · wyli · commit 1d9800f8862e · 2022-10-29T08:28:20.000+01:00
Signed-off-by: Wenqi Li &lt;wenqil@nvidia.com&gt;
diff --git a/monai/losses/dice.py b/monai/losses/dice.py
@@ -60,12 +60,12 @@ def __init__(
             include_background: if False, channel index 0 (background category) is excluded from the calculation.
                 if the non-background segmentations are small compared to the total image size they can get overwhelmed
                 by the signal from the background so excluding it in such cases helps convergence.
-            to_onehot_y: whether to convert `y` into the one-hot format. Defaults to False.
+            to_onehot_y: whether to convert the ``target`` into the one-hot format,
+                using the number of classes inferred from `input` (``input.shape[1]``). Defaults to False.
             sigmoid: if True, apply a sigmoid function to the prediction.
             softmax: if True, apply a softmax function to the prediction.
-            other_act: if don't want to use `sigmoid` or `softmax`, use other callable function to execute
-                other activation layers, Defaults to ``None``. for example:
-                `other_act = torch.tanh`.
+            other_act: callable function to execute other activation layers, Defaults to ``None``. for example:
+                ``other_act = torch.tanh``.
             squared_pred: use squared versions of targets and predictions in the denominator or not.
             jaccard: compute Jaccard Index (soft IoU) instead of dice or not.
             reduction: {``"none"``, ``"mean"``, ``"sum"``}
@@ -247,12 +247,12 @@ def __init__(
         """
         Args:
             include_background: If False channel index 0 (background category) is excluded from the calculation.
-            to_onehot_y: whether to convert `y` into the one-hot format. Defaults to False.
+            to_onehot_y: whether to convert the ``target`` into the one-hot format,
+                using the number of classes inferred from `input` (``input.shape[1]``). Defaults to False.
             sigmoid: If True, apply a sigmoid function to the prediction.
             softmax: If True, apply a softmax function to the prediction.
-            other_act: if don't want to use `sigmoid` or `softmax`, use other callable function to execute
-                other activation layers, Defaults to ``None``. for example:
-                `other_act = torch.tanh`.
+            other_act: callable function to execute other activation layers, Defaults to ``None``. for example:
+                ``other_act = torch.tanh``.
             w_type: {``"square"``, ``"simple"``, ``"uniform"``}
                 Type of function to transform ground truth volume to a weight factor. Defaults to ``"square"``.
             reduction: {``"none"``, ``"mean"``, ``"sum"``}
@@ -639,14 +639,14 @@ def __init__(
             ``reduction`` is used for both losses and other parameters are only used for dice loss.
 
             include_background: if False channel index 0 (background category) is excluded from the calculation.
-            to_onehot_y: whether to convert `y` into the one-hot format. Defaults to False.
+            to_onehot_y: whether to convert the ``target`` into the one-hot format,
+                using the number of classes inferred from `input` (``input.shape[1]``). Defaults to False.
             sigmoid: if True, apply a sigmoid function to the prediction, only used by the `DiceLoss`,
                 don't need to specify activation function for `CrossEntropyLoss`.
             softmax: if True, apply a softmax function to the prediction, only used by the `DiceLoss`,
                 don't need to specify activation function for `CrossEntropyLoss`.
-            other_act: if don't want to use `sigmoid` or `softmax`, use other callable function to execute
-                other activation layers, Defaults to ``None``. for example: `other_act = torch.tanh`.
-                only used by the `DiceLoss`, don't need to specify activation function for `CrossEntropyLoss`.
+            other_act: callable function to execute other activation layers, Defaults to ``None``. for example:
+                ``other_act = torch.tanh``. only used by the `DiceLoss`, not for the `CrossEntropyLoss`.
             squared_pred: use squared versions of targets and predictions in the denominator or not.
             jaccard: compute Jaccard Index (soft IoU) instead of dice or not.
             reduction: {``"mean"``, ``"sum"``}
@@ -728,7 +728,10 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
 
         """
         if len(input.shape) != len(target.shape):
-            raise ValueError("the number of dimensions for input and target should be the same.")
+            raise ValueError(
+                "the number of dimensions for input and target should be the same, "
+                f"got shape {input.shape} and {target.shape}."
+            )
 
         dice_loss = self.dice(input, target)
         ce_loss = self.ce(input, target)
@@ -743,6 +746,10 @@ class DiceFocalLoss(_Loss):
     The details of Dice loss is shown in ``monai.losses.DiceLoss``.
     The details of Focal Loss is shown in ``monai.losses.FocalLoss``.
 
+    ``gamma``, ``focal_weight`` and ``lambda_focal`` are only used for the focal loss.
+    ``include_background`` and ``reduction`` are used for both losses
+    and other parameters are only used for dice loss.
+
     """
 
     def __init__(
@@ -765,18 +772,15 @@ def __init__(
     ) -> None:
         """
         Args:
-            ``gamma``, ``focal_weight`` and ``lambda_focal`` are only used for focal loss.
-            ``include_background``, ``to_onehot_y``and ``reduction`` are used for both losses
-            and other parameters are only used for dice loss.
             include_background: if False channel index 0 (background category) is excluded from the calculation.
-            to_onehot_y: whether to convert `y` into the one-hot format. Defaults to False.
+            to_onehot_y: whether to convert the ``target`` into the one-hot format,
+                using the number of classes inferred from `input` (``input.shape[1]``). Defaults to False.
             sigmoid: if True, apply a sigmoid function to the prediction, only used by the `DiceLoss`,
                 don't need to specify activation function for `FocalLoss`.
             softmax: if True, apply a softmax function to the prediction, only used by the `DiceLoss`,
                 don't need to specify activation function for `FocalLoss`.
-            other_act: if don't want to use `sigmoid` or `softmax`, use other callable function to execute
-                other activation layers, Defaults to ``None``. for example: `other_act = torch.tanh`.
-                only used by the `DiceLoss`, don't need to specify activation function for `FocalLoss`.
+            other_act: callable function to execute other activation layers, Defaults to ``None``.
+                for example: `other_act = torch.tanh`. only used by the `DiceLoss`, not for `FocalLoss`.
             squared_pred: use squared versions of targets and predictions in the denominator or not.
             jaccard: compute Jaccard Index (soft IoU) instead of dice or not.
             reduction: {``"none"``, ``"mean"``, ``"sum"``}
@@ -803,6 +807,8 @@ def __init__(
         """
         super().__init__()
         self.dice = DiceLoss(
+            include_background=include_background,
+            to_onehot_y=False,
             sigmoid=sigmoid,
             softmax=softmax,
             other_act=other_act,
@@ -813,15 +819,20 @@ def __init__(
             smooth_dr=smooth_dr,
             batch=batch,
         )
-        self.focal = FocalLoss(gamma=gamma, weight=focal_weight, reduction=reduction)
+        self.focal = FocalLoss(
+            include_background=include_background,
+            to_onehot_y=False,
+            gamma=gamma,
+            weight=focal_weight,
+            reduction=reduction,
+        )
         if lambda_dice < 0.0:
             raise ValueError("lambda_dice should be no less than 0.0.")
         if lambda_focal < 0.0:
             raise ValueError("lambda_focal should be no less than 0.0.")
         self.lambda_dice = lambda_dice
         self.lambda_focal = lambda_focal
         self.to_onehot_y = to_onehot_y
-        self.include_background = include_background
 
     def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
         """
@@ -836,24 +847,16 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
 
         """
         if len(input.shape) != len(target.shape):
-            raise ValueError("the number of dimensions for input and target should be the same.")
-
-        n_pred_ch = input.shape[1]
-
+            raise ValueError(
+                "the number of dimensions for input and target should be the same, "
+                f"got shape {input.shape} and {target.shape}."
+            )
         if self.to_onehot_y:
+            n_pred_ch = input.shape[1]
             if n_pred_ch == 1:
                 warnings.warn("single channel prediction, `to_onehot_y=True` ignored.")
             else:
                 target = one_hot(target, num_classes=n_pred_ch)
-
-        if not self.include_background:
-            if n_pred_ch == 1:
-                warnings.warn("single channel prediction, `include_background=False` ignored.")
-            else:
-                # if skipping background, removing first channel
-                target = target[:, 1:]
-                input = input[:, 1:]
-
         dice_loss = self.dice(input, target)
         focal_loss = self.focal(input, target)
         total_loss: torch.Tensor = self.lambda_dice * dice_loss + self.lambda_focal * focal_loss
@@ -867,11 +870,13 @@ class GeneralizedDiceFocalLoss(torch.nn.modules.loss._Loss):
     Args:
         include_background (bool, optional): if False channel index 0 (background category) is excluded from the calculation.
             Defaults to True.
-        to_onehot_y (bool, optional): whether to convert `y` into the one-hot format. Defaults to False.
+        to_onehot_y: whether to convert the ``target`` into the one-hot format,
+            using the number of classes inferred from `input` (``input.shape[1]``). Defaults to False.
         sigmoid (bool, optional): if True, apply a sigmoid function to the prediction. Defaults to False.
         softmax (bool, optional): if True, apply a softmax function to the prediction. Defaults to False.
-        other_act (Optional[Callable], optional): if don't want to use sigmoid or softmax, use other callable
-            function to execute other activation layers. Defaults to None.
+        other_act (Optional[Callable], optional): callable function to execute other activation layers,
+            Defaults to ``None``. for example: `other_act = torch.tanh`.
+            only used by the `GeneralizedDiceLoss`, not for the `FocalLoss`.
         w_type (Union[Weight, str], optional): {``"square"``, ``"simple"``, ``"uniform"``}. Type of function to transform
             ground-truth volume to a weight factor. Defaults to ``"square"``.
         reduction (Union[LossReduction, str], optional): {``"none"``, ``"mean"``, ``"sum"``}. Specified the reduction to
diff --git a/monai/networks/nets/attentionunet.py b/monai/networks/nets/attentionunet.py
@@ -143,12 +143,27 @@ def forward(self, g: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
 
 
 class AttentionLayer(nn.Module):
-    def __init__(self, spatial_dims: int, in_channels: int, out_channels: int, submodule: nn.Module, dropout=0.0):
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        submodule: nn.Module,
+        up_kernel_size=3,
+        strides=2,
+        dropout=0.0,
+    ):
         super().__init__()
         self.attention = AttentionBlock(
             spatial_dims=spatial_dims, f_g=in_channels, f_l=in_channels, f_int=in_channels // 2
         )
-        self.upconv = UpConv(spatial_dims=spatial_dims, in_channels=out_channels, out_channels=in_channels, strides=2)
+        self.upconv = UpConv(
+            spatial_dims=spatial_dims,
+            in_channels=out_channels,
+            out_channels=in_channels,
+            strides=strides,
+            kernel_size=up_kernel_size,
+        )
         self.merge = Convolution(
             spatial_dims=spatial_dims, in_channels=2 * in_channels, out_channels=in_channels, dropout=dropout
         )
@@ -174,7 +189,7 @@ class AttentionUnet(nn.Module):
         channels (Sequence[int]): sequence of channels. Top block first. The length of `channels` should be no less than 2.
         strides (Sequence[int]): stride to use for convolutions.
         kernel_size: convolution kernel size.
-        upsample_kernel_size: convolution kernel size for transposed convolution layers.
+        up_kernel_size: convolution kernel size for transposed convolution layers.
         dropout: dropout ratio. Defaults to no dropout.
     """
 
@@ -210,9 +225,9 @@ def __init__(
         )
         self.up_kernel_size = up_kernel_size
 
-        def _create_block(channels: Sequence[int], strides: Sequence[int], level: int = 0) -> nn.Module:
+        def _create_block(channels: Sequence[int], strides: Sequence[int]) -> nn.Module:
             if len(channels) > 2:
-                subblock = _create_block(channels[1:], strides[1:], level=level + 1)
+                subblock = _create_block(channels[1:], strides[1:])
                 return AttentionLayer(
                     spatial_dims=spatial_dims,
                     in_channels=channels[0],
@@ -227,17 +242,19 @@ def _create_block(channels: Sequence[int], strides: Sequence[int], level: int =
                         ),
                         subblock,
                     ),
+                    up_kernel_size=self.up_kernel_size,
+                    strides=strides[0],
                     dropout=dropout,
                 )
             else:
                 # the next layer is the bottom so stop recursion,
-                # create the bottom layer as the sublock for this layer
-                return self._get_bottom_layer(channels[0], channels[1], strides[0], level=level + 1)
+                # create the bottom layer as the subblock for this layer
+                return self._get_bottom_layer(channels[0], channels[1], strides[0])
 
         encdec = _create_block(self.channels, self.strides)
         self.model = nn.Sequential(head, encdec, reduce_channels)
 
-    def _get_bottom_layer(self, in_channels: int, out_channels: int, strides: int, level: int) -> nn.Module:
+    def _get_bottom_layer(self, in_channels: int, out_channels: int, strides: int) -> nn.Module:
         return AttentionLayer(
             spatial_dims=self.dimensions,
             in_channels=in_channels,
@@ -249,6 +266,8 @@ def _get_bottom_layer(self, in_channels: int, out_channels: int, strides: int, l
                 strides=strides,
                 dropout=self.dropout,
             ),
+            up_kernel_size=self.up_kernel_size,
+            strides=strides,
             dropout=self.dropout,
         )
 
diff --git a/monai/transforms/utils.py b/monai/transforms/utils.py
@@ -505,11 +505,11 @@ def generate_pos_neg_label_crop_centers(
         raise ValueError("No sampling location available.")
 
     if len(fg_indices) == 0 or len(bg_indices) == 0:
+        pos_ratio = 0 if len(fg_indices) == 0 else 1
         warnings.warn(
-            f"N foreground {len(fg_indices)}, N  background {len(bg_indices)},"
-            "unable to generate class balanced samples."
+            f"Num foregrounds {len(fg_indices)}, Num backgrounds {len(bg_indices)}, "
+            f"unable to generate class balanced samples, setting `pos_ratio` to {pos_ratio}."
         )
-        pos_ratio = 0 if fg_indices.size == 0 else 1
 
     for _ in range(num_samples):
         indices_to_use = fg_indices if rand_state.rand() < pos_ratio else bg_indices
diff --git a/monai/visualize/gradient_based.py b/monai/visualize/gradient_based.py
@@ -90,7 +90,7 @@ def get_grad(self, x: torch.Tensor, index: torch.Tensor | int | None, retain_gra
         x.requires_grad = True
 
         self._model(x, class_idx=index, retain_graph=retain_graph, **kwargs)
-        grad: torch.Tensor = x.grad.detach()
+        grad: torch.Tensor = x.grad.detach()  # type: ignore
         return grad
 
     def __call__(self, x: torch.Tensor, index: torch.Tensor | int | None = None, **kwargs) -> torch.Tensor:
diff --git a/tests/test_attentionunet.py b/tests/test_attentionunet.py
@@ -39,7 +39,7 @@ def test_attentionunet(self):
             shape = (3, 1) + (92,) * dims
             input = torch.rand(*shape)
             model = att.AttentionUnet(
-                spatial_dims=dims, in_channels=1, out_channels=2, channels=(3, 4, 5), strides=(2, 2)
+                spatial_dims=dims, in_channels=1, out_channels=2, channels=(3, 4, 5), up_kernel_size=5, strides=(1, 2)
             )
             output = model(input)
             self.assertEqual(output.shape[2:], input.shape[2:])
diff --git a/tests/test_dice_focal_loss.py b/tests/test_dice_focal_loss.py
@@ -13,6 +13,7 @@
 
 import numpy as np
 import torch
+from parameterized import parameterized
 
 from monai.losses import DiceFocalLoss, DiceLoss, FocalLoss
 from tests.utils import test_script_save
@@ -36,17 +37,24 @@ def test_result_onehot_target_include_bg(self):
                     expected_val = dice(pred, label) + lambda_focal * focal(pred, label)
                     np.testing.assert_allclose(result, expected_val)
 
-    def test_result_no_onehot_no_bg(self):
-        size = [3, 3, 5, 5]
-        label = torch.randint(low=0, high=2, size=size)
-        label = torch.argmax(label, dim=1, keepdim=True)
+    @parameterized.expand([[[3, 3, 5, 5], True], [[3, 2, 5, 5], False]])
+    def test_result_no_onehot_no_bg(self, size, onehot):
+        label = torch.randint(low=0, high=size[1] - 1, size=size)
+        if onehot:
+            label = torch.argmax(label, dim=1, keepdim=True)
         pred = torch.randn(size)
         for reduction in ["sum", "mean", "none"]:
-            common_params = {"include_background": False, "to_onehot_y": True, "reduction": reduction}
-            for focal_weight in [2.0, torch.tensor([1.0, 2.0]), (2.0, 1)]:
+            for focal_weight in [2.0] + [] if size[1] != 3 else [torch.tensor([1.0, 2.0]), (2.0, 1)]:
                 for lambda_focal in [0.5, 1.0, 1.5]:
+                    common_params = {
+                        "include_background": False,
+                        "softmax": True,
+                        "to_onehot_y": onehot,
+                        "reduction": reduction,
+                    }
                     dice_focal = DiceFocalLoss(focal_weight=focal_weight, lambda_focal=lambda_focal, **common_params)
                     dice = DiceLoss(**common_params)
+                    common_params.pop("softmax", None)
                     focal = FocalLoss(weight=focal_weight, **common_params)
                     result = dice_focal(pred, label)
                     expected_val = dice(pred, label) + lambda_focal * focal(pred, label)
diff --git a/tests/test_generate_pos_neg_label_crop_centers.py b/tests/test_generate_pos_neg_label_crop_centers.py
@@ -31,7 +31,20 @@
         list,
         2,
         3,
-    ]
+    ],
+    [
+        {
+            "spatial_size": [2, 2, 2],
+            "num_samples": 2,
+            "pos_ratio": 0.0,
+            "label_spatial_shape": [3, 3, 3],
+            "fg_indices": [],
+            "bg_indices": [3, 12, 21],
+        },
+        list,
+        2,
+        3,
+    ],
 ]
 
 
diff --git a/tests/test_nifti_rw.py b/tests/test_nifti_rw.py
diff --git a/tests/test_randomizable_transform_type.py b/tests/test_randomizable_transform_type.py

Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,7 @@ def test_attentionunet(self):`
`39`	`39`	`shape = (3, 1) + (92,) * dims`
`40`	`40`	`input = torch.rand(*shape)`
`41`	`41`	`model = att.AttentionUnet(`
`42`		`- spatial_dims=dims, in_channels=1, out_channels=2, channels=(3, 4, 5), strides=(2, 2)`
	`42`	`+ spatial_dims=dims, in_channels=1, out_channels=2, channels=(3, 4, 5), up_kernel_size=5, strides=(1, 2)`
`43`	`43`	`)`
`44`	`44`	`output = model(input)`
`45`	`45`	`self.assertEqual(output.shape[2:], input.shape[2:])`