Skip to content

Commit

Permalink
Fix correctness of some group matching regex (no impact on result), s…
Browse files Browse the repository at this point in the history
…ome formatting, missed forward_head for resnet
  • Loading branch information
rwightman committed Mar 19, 2022
1 parent 94bcdeb commit 0862e6e
Show file tree
Hide file tree
Showing 31 changed files with 120 additions and 118 deletions.
28 changes: 16 additions & 12 deletions timm/models/byobnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,9 +1004,10 @@ class BottleneckBlock(nn.Module):
""" ResNet-like Bottleneck Block - 1x1 - kxk - 1x1
"""

def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1., group_size=None,
downsample='avg', attn_last=False, linear_out=False, extra_conv=False, bottle_in=False,
layers: LayerFn = None, drop_block=None, drop_path_rate=0.):
def __init__(
self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1., group_size=None,
downsample='avg', attn_last=False, linear_out=False, extra_conv=False, bottle_in=False,
layers: LayerFn = None, drop_block=None, drop_path_rate=0.):
super(BottleneckBlock, self).__init__()
layers = layers or LayerFn()
mid_chs = make_divisible((in_chs if bottle_in else out_chs) * bottle_ratio)
Expand Down Expand Up @@ -1061,9 +1062,10 @@ class DarkBlock(nn.Module):
for more optimal compute.
"""

def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
downsample='avg', attn_last=True, linear_out=False, layers: LayerFn = None, drop_block=None,
drop_path_rate=0.):
def __init__(
self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
downsample='avg', attn_last=True, linear_out=False, layers: LayerFn = None, drop_block=None,
drop_path_rate=0.):
super(DarkBlock, self).__init__()
layers = layers or LayerFn()
mid_chs = make_divisible(out_chs * bottle_ratio)
Expand Down Expand Up @@ -1111,9 +1113,10 @@ class EdgeBlock(nn.Module):
FIXME is there a more common 3x3 + 1x1 conv block to name this after?
"""

def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
downsample='avg', attn_last=False, linear_out=False, layers: LayerFn = None,
drop_block=None, drop_path_rate=0.):
def __init__(
self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
downsample='avg', attn_last=False, linear_out=False, layers: LayerFn = None,
drop_block=None, drop_path_rate=0.):
super(EdgeBlock, self).__init__()
layers = layers or LayerFn()
mid_chs = make_divisible(out_chs * bottle_ratio)
Expand Down Expand Up @@ -1158,8 +1161,9 @@ class RepVggBlock(nn.Module):
This version does not currently support the deploy optimization. It is currently fixed in 'train' mode.
"""

def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
downsample='', layers: LayerFn = None, drop_block=None, drop_path_rate=0.):
def __init__(
self, in_chs, out_chs, kernel_size=3, stride=1, dilation=(1, 1), bottle_ratio=1.0, group_size=None,
downsample='', layers: LayerFn = None, drop_block=None, drop_path_rate=0.):
super(RepVggBlock, self).__init__()
layers = layers or LayerFn()
groups = num_groups(group_size, in_chs)
Expand Down Expand Up @@ -1522,7 +1526,7 @@ def group_matcher(self, coarse=False):
matcher = dict(
stem=r'^stem',
blocks=[
(r'^stages\.(\d+)' if coarse else r'^stages\.(\d+).(\d+)', None),
(r'^stages\.(\d+)' if coarse else r'^stages\.(\d+)\.(\d+)', None),
(r'^final_conv', (99999,))
]
)
Expand Down
5 changes: 3 additions & 2 deletions timm/models/crossvit.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,9 @@ def forward(self, x):

class CrossAttentionBlock(nn.Module):

def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
def __init__(
self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = CrossAttention(
Expand Down
34 changes: 19 additions & 15 deletions timm/models/cspnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,10 @@ class ResBottleneck(nn.Module):
""" ResNe(X)t Bottleneck Block
"""

def __init__(self, in_chs, out_chs, dilation=1, bottle_ratio=0.25, groups=1,
act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, attn_last=False,
attn_layer=None, aa_layer=None, drop_block=None, drop_path=None):
def __init__(
self, in_chs, out_chs, dilation=1, bottle_ratio=0.25, groups=1,
act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, attn_last=False,
attn_layer=None, aa_layer=None, drop_block=None, drop_path=None):
super(ResBottleneck, self).__init__()
mid_chs = int(round(out_chs * bottle_ratio))
ckwargs = dict(act_layer=act_layer, norm_layer=norm_layer)
Expand Down Expand Up @@ -199,9 +200,10 @@ class DarkBlock(nn.Module):
""" DarkNet Block
"""

def __init__(self, in_chs, out_chs, dilation=1, bottle_ratio=0.5, groups=1,
act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, attn_layer=None, aa_layer=None,
drop_block=None, drop_path=None):
def __init__(
self, in_chs, out_chs, dilation=1, bottle_ratio=0.5, groups=1,
act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, attn_layer=None, aa_layer=None,
drop_block=None, drop_path=None):
super(DarkBlock, self).__init__()
mid_chs = int(round(out_chs * bottle_ratio))
ckwargs = dict(act_layer=act_layer, norm_layer=norm_layer)
Expand Down Expand Up @@ -229,9 +231,10 @@ def forward(self, x):

class CrossStage(nn.Module):
"""Cross Stage."""
def __init__(self, in_chs, out_chs, stride, dilation, depth, block_ratio=1., bottle_ratio=1., exp_ratio=1.,
groups=1, first_dilation=None, down_growth=False, cross_linear=False, block_dpr=None,
block_fn=ResBottleneck, **block_kwargs):
def __init__(
self, in_chs, out_chs, stride, dilation, depth, block_ratio=1., bottle_ratio=1., exp_ratio=1.,
groups=1, first_dilation=None, down_growth=False, cross_linear=False, block_dpr=None,
block_fn=ResBottleneck, **block_kwargs):
super(CrossStage, self).__init__()
first_dilation = first_dilation or dilation
down_chs = out_chs if down_growth else in_chs # grow downsample channels to output channels
Expand Down Expand Up @@ -280,8 +283,9 @@ def forward(self, x):
class DarkStage(nn.Module):
"""DarkNet stage."""

def __init__(self, in_chs, out_chs, stride, dilation, depth, block_ratio=1., bottle_ratio=1., groups=1,
first_dilation=None, block_fn=ResBottleneck, block_dpr=None, **block_kwargs):
def __init__(
self, in_chs, out_chs, stride, dilation, depth, block_ratio=1., bottle_ratio=1., groups=1,
first_dilation=None, block_fn=ResBottleneck, block_dpr=None, **block_kwargs):
super(DarkStage, self).__init__()
first_dilation = first_dilation or dilation

Expand Down Expand Up @@ -387,10 +391,10 @@ def __init__(
def group_matcher(self, coarse=False):
matcher = dict(
stem=r'^stem',
blocks=r'^stages.(\d+)' if coarse else [
(r'^stages.(\d+).blocks.(\d+)', None),
(r'^stages.(\d+).*transition', MATCH_PREV_GROUP), # map to last block in stage
(r'^stages.(\d+)', (0,)),
blocks=r'^stages\.(\d+)' if coarse else [
(r'^stages\.(\d+)\.blocks\.(\d+)', None),
(r'^stages\.(\d+)\..*transition', MATCH_PREV_GROUP), # map to last block in stage
(r'^stages\.(\d+)', (0,)),
]
)
return matcher
Expand Down
2 changes: 1 addition & 1 deletion timm/models/deit.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def group_matcher(self, coarse=False):
return dict(
stem=r'^cls_token|pos_embed|patch_embed|dist_token',
blocks=[
(r'^blocks.(\d+)', None),
(r'^blocks\.(\d+)', None),
(r'^norm', (99999,))] # final norm w/ last block
)

Expand Down
22 changes: 12 additions & 10 deletions timm/models/densenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,9 @@ def _cfg(url=''):


class DenseLayer(nn.Module):
def __init__(self, num_input_features, growth_rate, bn_size, norm_layer=BatchNormAct2d,
drop_rate=0., memory_efficient=False):
def __init__(
self, num_input_features, growth_rate, bn_size, norm_layer=BatchNormAct2d,
drop_rate=0., memory_efficient=False):
super(DenseLayer, self).__init__()
self.add_module('norm1', norm_layer(num_input_features)),
self.add_module('conv1', nn.Conv2d(
Expand Down Expand Up @@ -113,8 +114,9 @@ def forward(self, x): # noqa: F811
class DenseBlock(nn.ModuleDict):
_version = 2

def __init__(self, num_layers, num_input_features, bn_size, growth_rate, norm_layer=nn.ReLU,
drop_rate=0., memory_efficient=False):
def __init__(
self, num_layers, num_input_features, bn_size, growth_rate, norm_layer=nn.ReLU,
drop_rate=0., memory_efficient=False):
super(DenseBlock, self).__init__()
for i in range(num_layers):
layer = DenseLayer(
Expand Down Expand Up @@ -164,8 +166,8 @@ class DenseNet(nn.Module):

def __init__(
self, growth_rate=32, block_config=(6, 12, 24, 16), num_classes=1000, in_chans=3, global_pool='avg',
bn_size=4, stem_type='', norm_layer=BatchNormAct2d, aa_layer=None, drop_rate=0, memory_efficient=False,
aa_stem_only=True):
bn_size=4, stem_type='', norm_layer=BatchNormAct2d, aa_layer=None, drop_rate=0,
memory_efficient=False, aa_stem_only=True):
self.num_classes = num_classes
self.drop_rate = drop_rate
super(DenseNet, self).__init__()
Expand Down Expand Up @@ -252,10 +254,10 @@ def __init__(
@torch.jit.ignore
def group_matcher(self, coarse=False):
matcher = dict(
stem=r'^features.conv[012]|features.norm[012]|features.pool[012]',
blocks=r'^features.(?:denseblock|transition)(\d+)' if coarse else [
(r'^features.denseblock(\d+).denselayer(\d+)', None),
(r'^features.transition(\d+)', MATCH_PREV_GROUP) # FIXME combine with previous denselayer
stem=r'^features\.conv[012]|features\.norm[012]|features\.pool[012]',
blocks=r'^features\.(?:denseblock|transition)(\d+)' if coarse else [
(r'^features\.denseblock(\d+)\.denselayer(\d+)', None),
(r'^features\.transition(\d+)', MATCH_PREV_GROUP) # FIXME combine with previous denselayer
]
)
return matcher
Expand Down
4 changes: 2 additions & 2 deletions timm/models/dla.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,8 +323,8 @@ def group_matcher(self, coarse=False):
stem=r'^base_layer',
blocks=r'^level(\d+)' if coarse else [
# an unusual arch, this achieves somewhat more granularity without getting super messy
(r'^level(\d+).tree(\d+)', None),
(r'^level(\d+).root', (2,)),
(r'^level(\d+)\.tree(\d+)', None),
(r'^level(\d+)\.root', (2,)),
(r'^level(\d+)', (1,))
]
)
Expand Down
6 changes: 3 additions & 3 deletions timm/models/dpn.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,10 @@ def __init__(
@torch.jit.ignore
def group_matcher(self, coarse=False):
matcher = dict(
stem=r'^features.conv1',
stem=r'^features\.conv1',
blocks=[
(r'^features.conv(\d+)' if coarse else r'^features.conv(\d+)_(\d+)', None),
(r'^features.conv5_bn_ac', (99999,))
(r'^features\.conv(\d+)' if coarse else r'^features\.conv(\d+)_(\d+)', None),
(r'^features\.conv5_bn_ac', (99999,))
]
)
return matcher
Expand Down
2 changes: 1 addition & 1 deletion timm/models/efficientnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ def group_matcher(self, coarse=False):
return dict(
stem=r'^conv_stem|bn1',
blocks=[
(r'^blocks.(\d+)' if coarse else r'^blocks.(\d+).(\d+)', None),
(r'^blocks\.(\d+)' if coarse else r'^blocks\.(\d+)\.(\d+)', None),
(r'conv_head|bn2', (99999,))
]
)
Expand Down
2 changes: 1 addition & 1 deletion timm/models/ghostnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def group_matcher(self, coarse=False):
matcher = dict(
stem=r'^conv_stem|bn1',
blocks=[
(r'^blocks.(\d+)' if coarse else r'^blocks.(\d+).(\d+)', None),
(r'^blocks\.(\d+)' if coarse else r'^blocks\.(\d+)\.(\d+)', None),
(r'conv_head', (99999,))
]
)
Expand Down
2 changes: 1 addition & 1 deletion timm/models/gluon_xception.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def group_matcher(self, coarse=False):
matcher = dict(
stem=r'^conv[12]|bn[12]',
blocks=[
(r'^mid.block(\d+)', None),
(r'^mid\.block(\d+)', None),
(r'^block(\d+)', None),
(r'^conv[345]|bn[345]', (99,)),
],
Expand Down
4 changes: 2 additions & 2 deletions timm/models/hrnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,8 +686,8 @@ def group_matcher(self, coarse=False):
matcher = dict(
stem=r'^conv[12]|bn[12]',
blocks=r'^(?:layer|stage|transition)(\d+)' if coarse else [
(r'^layer(\d+).(\d+)', None),
(r'^stage(\d+).(\d+)', None),
(r'^layer(\d+)\.(\d+)', None),
(r'^stage(\d+)\.(\d+)', None),
(r'^transition(\d+)', (99999,)),
],
)
Expand Down
2 changes: 1 addition & 1 deletion timm/models/levit.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ def no_weight_decay(self):
def group_matcher(self, coarse=False):
matcher = dict(
stem=r'^cls_token|pos_embed|patch_embed', # stem and embed
blocks=[(r'^blocks.(\d+)', None), (r'^norm', (99999,))]
blocks=[(r'^blocks\.(\d+)', None), (r'^norm', (99999,))]
)
return matcher

Expand Down
2 changes: 1 addition & 1 deletion timm/models/mlp_mixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def init_weights(self, nlhb=False):
def group_matcher(self, coarse=False):
return dict(
stem=r'^stem', # stem and embed
blocks=[(r'^blocks.(\d+)', None), (r'^norm', (99999,))]
blocks=[(r'^blocks\.(\d+)', None), (r'^norm', (99999,))]
)

@torch.jit.ignore
Expand Down
2 changes: 1 addition & 1 deletion timm/models/mobilenetv3.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def as_sequential(self):
def group_matcher(self, coarse=False):
return dict(
stem=r'^conv_stem|bn1',
blocks=r'^blocks.(\d+)' if coarse else r'^blocks.(\d+).(\d+)'
blocks=r'^blocks\.(\d+)' if coarse else r'^blocks\.(\d+)\.(\d+)'
)

@torch.jit.ignore
Expand Down
4 changes: 2 additions & 2 deletions timm/models/nest.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,8 @@ def group_matcher(self, coarse=False):
matcher = dict(
stem=r'^patch_embed', # stem and embed
blocks=[
(r'^levels.(\d+)' if coarse else r'^levels.(\d+).transformer_encoder.(\d+)', None),
(r'^levels.(\d+).(?:pool|pos_embed)', (0,)),
(r'^levels\.(\d+)' if coarse else r'^levels\.(\d+)\.transformer_encoder\.(\d+)', None),
(r'^levels\.(\d+)\.(?:pool|pos_embed)', (0,)),
(r'^norm', (99999,))
]
)
Expand Down
3 changes: 1 addition & 2 deletions timm/models/nfnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ def _dm_nfnet_cfg(depths, channels=(256, 512, 1536, 1536), act_layer='gelu', ski
return cfg



model_cfgs = dict(
# NFNet-F models w/ GELU compatible with DeepMind weights
dm_nfnet_f0=_dm_nfnet_cfg(depths=(1, 2, 6, 3)),
Expand Down Expand Up @@ -550,7 +549,7 @@ def group_matcher(self, coarse=False):
matcher = dict(
stem=r'^stem',
blocks=[
(r'^stages.(\d+)' if coarse else r'^stages.(\d+).(\d+)', None),
(r'^stages\.(\d+)' if coarse else r'^stages\.(\d+)\.(\d+)', None),
(r'^final_conv', (99999,))
]
)
Expand Down
2 changes: 1 addition & 1 deletion timm/models/regnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ def _get_stage_args(self, cfg: RegNetCfg, default_stride=2, output_stride=32, dr
def group_matcher(self, coarse=False):
return dict(
stem=r'^stem',
blocks=r'^stages.(\d+)' if coarse else r'^stages.(\d+).blocks.(\d+)',
blocks=r'^stages\.(\d+)' if coarse else r'^stages\.(\d+)\.blocks\.(\d+)',
)

@torch.jit.ignore
Expand Down
Loading

0 comments on commit 0862e6e

Please sign in to comment.