Fix correctness of some group matching regex (no impact on result), s…

…ome formatting, missed forward_head for resnet
ares89 · Mar 19, 2022 · 0862e6e · 0862e6e
1 parent 94bcdeb
commit 0862e6e
Show file tree

Hide file tree

Showing 31 changed files with 120 additions and 118 deletions.
diff --git a/timm/models/byobnet.py b/timm/models/byobnet.py
@@ -1004,9 +1004,10 @@ class BottleneckBlock(nn.Module):
     """ ResNet-like Bottleneck Block - 1x1 - kxk - 1x1
     """
 
-    def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1., group_size=None,
-                 downsample='avg', attn_last=False, linear_out=False, extra_conv=False, bottle_in=False,
-                 layers: LayerFn = None, drop_block=None, drop_path_rate=0.):
+    def __init__(
+            self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1., group_size=None,
+            downsample='avg', attn_last=False, linear_out=False, extra_conv=False, bottle_in=False,
+            layers: LayerFn = None, drop_block=None, drop_path_rate=0.):
         super(BottleneckBlock, self).__init__()
         layers = layers or LayerFn()
         mid_chs = make_divisible((in_chs if bottle_in else out_chs) * bottle_ratio)
@@ -1061,9 +1062,10 @@ class DarkBlock(nn.Module):
     for more optimal compute.
     """
 
-    def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
-                 downsample='avg', attn_last=True, linear_out=False, layers: LayerFn = None, drop_block=None,
-                 drop_path_rate=0.):
+    def __init__(
+            self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
+            downsample='avg', attn_last=True, linear_out=False, layers: LayerFn = None, drop_block=None,
+            drop_path_rate=0.):
         super(DarkBlock, self).__init__()
         layers = layers or LayerFn()
         mid_chs = make_divisible(out_chs * bottle_ratio)
@@ -1111,9 +1113,10 @@ class EdgeBlock(nn.Module):
     FIXME is there a more common 3x3 + 1x1 conv block to name this after?
     """
 
-    def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
-                 downsample='avg', attn_last=False, linear_out=False, layers: LayerFn = None,
-                 drop_block=None, drop_path_rate=0.):
+    def __init__(
+            self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
+            downsample='avg', attn_last=False, linear_out=False, layers: LayerFn = None,
+            drop_block=None, drop_path_rate=0.):
         super(EdgeBlock, self).__init__()
         layers = layers or LayerFn()
         mid_chs = make_divisible(out_chs * bottle_ratio)
@@ -1158,8 +1161,9 @@ class RepVggBlock(nn.Module):
     This version does not currently support the deploy optimization. It is currently fixed in 'train' mode.
     """
 
-    def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
-                 downsample='', layers: LayerFn = None, drop_block=None, drop_path_rate=0.):
+    def __init__(
+            self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
+            downsample='', layers: LayerFn = None, drop_block=None, drop_path_rate=0.):
         super(RepVggBlock, self).__init__()
         layers = layers or LayerFn()
         groups = num_groups(group_size, in_chs)
@@ -1522,7 +1526,7 @@ def group_matcher(self, coarse=False):
         matcher = dict(
             stem=r'^stem',
             blocks=[
-                (r'^stages\.(\d+)' if coarse else r'^stages\.(\d+).(\d+)', None),
+                (r'^stages\.(\d+)' if coarse else r'^stages\.(\d+)\.(\d+)', None),
                 (r'^final_conv', (99999,))
             ]
         )

diff --git a/timm/models/crossvit.py b/timm/models/crossvit.py
@@ -164,8 +164,9 @@ def forward(self, x):
 
 class CrossAttentionBlock(nn.Module):
 
-    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
-                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+    def __init__(
+            self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+            drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
         super().__init__()
         self.norm1 = norm_layer(dim)
         self.attn = CrossAttention(

diff --git a/timm/models/cspnet.py b/timm/models/cspnet.py
@@ -157,9 +157,10 @@ class ResBottleneck(nn.Module):
     """ ResNe(X)t Bottleneck Block
     """
 
-    def __init__(self, in_chs, out_chs, dilation=1, bottle_ratio=0.25, groups=1,
-                 act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, attn_last=False,
-                 attn_layer=None, aa_layer=None, drop_block=None, drop_path=None):
+    def __init__(
+            self, in_chs, out_chs, dilation=1, bottle_ratio=0.25, groups=1,
+            act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, attn_last=False,
+            attn_layer=None, aa_layer=None, drop_block=None, drop_path=None):
         super(ResBottleneck, self).__init__()
         mid_chs = int(round(out_chs * bottle_ratio))
         ckwargs = dict(act_layer=act_layer, norm_layer=norm_layer)
@@ -199,9 +200,10 @@ class DarkBlock(nn.Module):
     """ DarkNet Block
     """
 
-    def __init__(self, in_chs, out_chs, dilation=1, bottle_ratio=0.5, groups=1,
-                 act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, attn_layer=None, aa_layer=None,
-                 drop_block=None, drop_path=None):
+    def __init__(
+            self, in_chs, out_chs, dilation=1, bottle_ratio=0.5, groups=1,
+            act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, attn_layer=None, aa_layer=None,
+            drop_block=None, drop_path=None):
         super(DarkBlock, self).__init__()
         mid_chs = int(round(out_chs * bottle_ratio))
         ckwargs = dict(act_layer=act_layer, norm_layer=norm_layer)
@@ -229,9 +231,10 @@ def forward(self, x):
 
 class CrossStage(nn.Module):
     """Cross Stage."""
-    def __init__(self, in_chs, out_chs, stride, dilation, depth, block_ratio=1., bottle_ratio=1., exp_ratio=1.,
-                 groups=1, first_dilation=None, down_growth=False, cross_linear=False, block_dpr=None,
-                 block_fn=ResBottleneck, **block_kwargs):
+    def __init__(
+            self, in_chs, out_chs, stride, dilation, depth, block_ratio=1., bottle_ratio=1., exp_ratio=1.,
+            groups=1, first_dilation=None, down_growth=False, cross_linear=False, block_dpr=None,
+            block_fn=ResBottleneck, **block_kwargs):
         super(CrossStage, self).__init__()
         first_dilation = first_dilation or dilation
         down_chs = out_chs if down_growth else in_chs  # grow downsample channels to output channels
@@ -280,8 +283,9 @@ def forward(self, x):
 class DarkStage(nn.Module):
     """DarkNet stage."""
 
-    def __init__(self, in_chs, out_chs, stride, dilation, depth, block_ratio=1., bottle_ratio=1., groups=1,
-                 first_dilation=None, block_fn=ResBottleneck, block_dpr=None, **block_kwargs):
+    def __init__(
+            self, in_chs, out_chs, stride, dilation, depth, block_ratio=1., bottle_ratio=1., groups=1,
+            first_dilation=None, block_fn=ResBottleneck, block_dpr=None, **block_kwargs):
         super(DarkStage, self).__init__()
         first_dilation = first_dilation or dilation
 
@@ -387,10 +391,10 @@ def __init__(
     def group_matcher(self, coarse=False):
         matcher = dict(
             stem=r'^stem',
-            blocks=r'^stages.(\d+)' if coarse else [
-                (r'^stages.(\d+).blocks.(\d+)', None),
-                (r'^stages.(\d+).*transition', MATCH_PREV_GROUP),  # map to last block in stage
-                (r'^stages.(\d+)', (0,)),
+            blocks=r'^stages\.(\d+)' if coarse else [
+                (r'^stages\.(\d+)\.blocks\.(\d+)', None),
+                (r'^stages\.(\d+)\..*transition', MATCH_PREV_GROUP),  # map to last block in stage
+                (r'^stages\.(\d+)', (0,)),
             ]
         )
         return matcher

diff --git a/timm/models/deit.py b/timm/models/deit.py
@@ -85,7 +85,7 @@ def group_matcher(self, coarse=False):
         return dict(
             stem=r'^cls_token|pos_embed|patch_embed|dist_token',
             blocks=[
-                (r'^blocks.(\d+)', None),
+                (r'^blocks\.(\d+)', None),
                 (r'^norm', (99999,))]  # final norm w/ last block
         )
 

diff --git a/timm/models/densenet.py b/timm/models/densenet.py
@@ -45,8 +45,9 @@ def _cfg(url=''):
 
 
 class DenseLayer(nn.Module):
-    def __init__(self, num_input_features, growth_rate, bn_size, norm_layer=BatchNormAct2d,
-                 drop_rate=0., memory_efficient=False):
+    def __init__(
+            self, num_input_features, growth_rate, bn_size, norm_layer=BatchNormAct2d,
+            drop_rate=0., memory_efficient=False):
         super(DenseLayer, self).__init__()
         self.add_module('norm1', norm_layer(num_input_features)),
         self.add_module('conv1', nn.Conv2d(
@@ -113,8 +114,9 @@ def forward(self, x):  # noqa: F811
 class DenseBlock(nn.ModuleDict):
     _version = 2
 
-    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, norm_layer=nn.ReLU,
-                 drop_rate=0., memory_efficient=False):
+    def __init__(
+            self, num_layers, num_input_features, bn_size, growth_rate, norm_layer=nn.ReLU,
+            drop_rate=0., memory_efficient=False):
         super(DenseBlock, self).__init__()
         for i in range(num_layers):
             layer = DenseLayer(
@@ -164,8 +166,8 @@ class DenseNet(nn.Module):
 
     def __init__(
             self, growth_rate=32, block_config=(6, 12, 24, 16), num_classes=1000, in_chans=3, global_pool='avg',
-            bn_size=4, stem_type='', norm_layer=BatchNormAct2d, aa_layer=None, drop_rate=0, memory_efficient=False,
-            aa_stem_only=True):
+            bn_size=4, stem_type='', norm_layer=BatchNormAct2d, aa_layer=None, drop_rate=0,
+            memory_efficient=False, aa_stem_only=True):
         self.num_classes = num_classes
         self.drop_rate = drop_rate
         super(DenseNet, self).__init__()
@@ -252,10 +254,10 @@ def __init__(
     @torch.jit.ignore
     def group_matcher(self, coarse=False):
         matcher = dict(
-            stem=r'^features.conv[012]|features.norm[012]|features.pool[012]',
-            blocks=r'^features.(?:denseblock|transition)(\d+)' if coarse else [
-                (r'^features.denseblock(\d+).denselayer(\d+)', None),
-                (r'^features.transition(\d+)', MATCH_PREV_GROUP)  # FIXME combine with previous denselayer
+            stem=r'^features\.conv[012]|features\.norm[012]|features\.pool[012]',
+            blocks=r'^features\.(?:denseblock|transition)(\d+)' if coarse else [
+                (r'^features\.denseblock(\d+)\.denselayer(\d+)', None),
+                (r'^features\.transition(\d+)', MATCH_PREV_GROUP)  # FIXME combine with previous denselayer
             ]
         )
         return matcher

diff --git a/timm/models/dla.py b/timm/models/dla.py
@@ -323,8 +323,8 @@ def group_matcher(self, coarse=False):
             stem=r'^base_layer',
             blocks=r'^level(\d+)' if coarse else [
                 # an unusual arch, this achieves somewhat more granularity without getting super messy
-                (r'^level(\d+).tree(\d+)', None),
-                (r'^level(\d+).root', (2,)),
+                (r'^level(\d+)\.tree(\d+)', None),
+                (r'^level(\d+)\.root', (2,)),
                 (r'^level(\d+)', (1,))
             ]
         )

diff --git a/timm/models/dpn.py b/timm/models/dpn.py
@@ -243,10 +243,10 @@ def __init__(
     @torch.jit.ignore
     def group_matcher(self, coarse=False):
         matcher = dict(
-            stem=r'^features.conv1',
+            stem=r'^features\.conv1',
             blocks=[
-                (r'^features.conv(\d+)' if coarse else r'^features.conv(\d+)_(\d+)', None),
-                (r'^features.conv5_bn_ac', (99999,))
+                (r'^features\.conv(\d+)' if coarse else r'^features\.conv(\d+)_(\d+)', None),
+                (r'^features\.conv5_bn_ac', (99999,))
             ]
         )
         return matcher

diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py
@@ -518,7 +518,7 @@ def group_matcher(self, coarse=False):
         return dict(
             stem=r'^conv_stem|bn1',
             blocks=[
-                (r'^blocks.(\d+)' if coarse else r'^blocks.(\d+).(\d+)', None),
+                (r'^blocks\.(\d+)' if coarse else r'^blocks\.(\d+)\.(\d+)', None),
                 (r'conv_head|bn2', (99999,))
             ]
         )

diff --git a/timm/models/ghostnet.py b/timm/models/ghostnet.py
@@ -193,7 +193,7 @@ def group_matcher(self, coarse=False):
         matcher = dict(
             stem=r'^conv_stem|bn1',
             blocks=[
-                (r'^blocks.(\d+)' if coarse else r'^blocks.(\d+).(\d+)', None),
+                (r'^blocks\.(\d+)' if coarse else r'^blocks\.(\d+)\.(\d+)', None),
                 (r'conv_head', (99999,))
             ]
         )

diff --git a/timm/models/gluon_xception.py b/timm/models/gluon_xception.py
@@ -184,7 +184,7 @@ def group_matcher(self, coarse=False):
         matcher = dict(
             stem=r'^conv[12]|bn[12]',
             blocks=[
-                (r'^mid.block(\d+)', None),
+                (r'^mid\.block(\d+)', None),
                 (r'^block(\d+)', None),
                 (r'^conv[345]|bn[345]', (99,)),
             ],

diff --git a/timm/models/hrnet.py b/timm/models/hrnet.py
@@ -686,8 +686,8 @@ def group_matcher(self, coarse=False):
         matcher = dict(
             stem=r'^conv[12]|bn[12]',
             blocks=r'^(?:layer|stage|transition)(\d+)' if coarse else [
-                (r'^layer(\d+).(\d+)', None),
-                (r'^stage(\d+).(\d+)', None),
+                (r'^layer(\d+)\.(\d+)', None),
+                (r'^stage(\d+)\.(\d+)', None),
                 (r'^transition(\d+)', (99999,)),
             ],
         )

diff --git a/timm/models/levit.py b/timm/models/levit.py
@@ -496,7 +496,7 @@ def no_weight_decay(self):
     def group_matcher(self, coarse=False):
         matcher = dict(
             stem=r'^cls_token|pos_embed|patch_embed',  # stem and embed
-            blocks=[(r'^blocks.(\d+)', None), (r'^norm', (99999,))]
+            blocks=[(r'^blocks\.(\d+)', None), (r'^norm', (99999,))]
         )
         return matcher
 

diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py
@@ -291,7 +291,7 @@ def init_weights(self, nlhb=False):
     def group_matcher(self, coarse=False):
         return dict(
             stem=r'^stem',  # stem and embed
-            blocks=[(r'^blocks.(\d+)', None), (r'^norm', (99999,))]
+            blocks=[(r'^blocks\.(\d+)', None), (r'^norm', (99999,))]
         )
 
     @torch.jit.ignore

diff --git a/timm/models/mobilenetv3.py b/timm/models/mobilenetv3.py
@@ -171,7 +171,7 @@ def as_sequential(self):
     def group_matcher(self, coarse=False):
         return dict(
             stem=r'^conv_stem|bn1',
-            blocks=r'^blocks.(\d+)' if coarse else r'^blocks.(\d+).(\d+)'
+            blocks=r'^blocks\.(\d+)' if coarse else r'^blocks\.(\d+)\.(\d+)'
         )
 
     @torch.jit.ignore

diff --git a/timm/models/nest.py b/timm/models/nest.py
@@ -334,8 +334,8 @@ def group_matcher(self, coarse=False):
         matcher = dict(
             stem=r'^patch_embed',  # stem and embed
             blocks=[
-                (r'^levels.(\d+)' if coarse else r'^levels.(\d+).transformer_encoder.(\d+)', None),
-                (r'^levels.(\d+).(?:pool|pos_embed)', (0,)),
+                (r'^levels\.(\d+)' if coarse else r'^levels\.(\d+)\.transformer_encoder\.(\d+)', None),
+                (r'^levels\.(\d+)\.(?:pool|pos_embed)', (0,)),
                 (r'^norm', (99999,))
             ]
         )

diff --git a/timm/models/nfnet.py b/timm/models/nfnet.py
@@ -194,7 +194,6 @@ def _dm_nfnet_cfg(depths, channels=(256, 512, 1536, 1536), act_layer='gelu', ski
     return cfg
 
 
-
 model_cfgs = dict(
     # NFNet-F models w/ GELU compatible with DeepMind weights
     dm_nfnet_f0=_dm_nfnet_cfg(depths=(1, 2, 6, 3)),
@@ -550,7 +549,7 @@ def group_matcher(self, coarse=False):
         matcher = dict(
             stem=r'^stem',
             blocks=[
-                (r'^stages.(\d+)' if coarse else r'^stages.(\d+).(\d+)', None),
+                (r'^stages\.(\d+)' if coarse else r'^stages\.(\d+)\.(\d+)', None),
                 (r'^final_conv', (99999,))
             ]
         )

diff --git a/timm/models/regnet.py b/timm/models/regnet.py
@@ -458,7 +458,7 @@ def _get_stage_args(self, cfg: RegNetCfg, default_stride=2, output_stride=32, dr
     def group_matcher(self, coarse=False):
         return dict(
             stem=r'^stem',
-            blocks=r'^stages.(\d+)' if coarse else r'^stages.(\d+).blocks.(\d+)',
+            blocks=r'^stages\.(\d+)' if coarse else r'^stages\.(\d+)\.blocks\.(\d+)',
         )
 
     @torch.jit.ignore