Merge branch 'master' into patch-1

lukemelas · web-flow · commit 75ca1bf51dd8 · 2021-04-15T11:16:30.000-04:00
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -0,0 +1,21 @@
+name: Workflow
+
+on:
+  push:
+    branches:
+    - master
+
+jobs:
+  pypi-job:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Install twine
+        run: pip install twine
+      - name: Build package
+        run: python setup.py sdist
+      - name: Publish a Python distribution to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/README.md b/README.md
@@ -10,6 +10,17 @@ model = EfficientNet.from_pretrained('efficientnet-b0')
 
 ### Updates
 
+#### Update (April 2, 2021)
+
+The [EfficientNetV2 paper](https://arxiv.org/abs/2104.00298) has been released! I am working on implementing it as you read this :) 
+
+About EfficientNetV2:
+> EfficientNetV2 is a new family of convolutional networks that have faster training speed and better parameter efficiency than previous models. To develop this family of models, we use a combination of training-aware neural architecture search and scaling, to jointly optimize training speed and parameter efficiency. The models were searched from the search space enriched with new ops such as Fused-MBConv. 
+
+Here is a comparison: 
+> <img src="https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnetv2-image.png" width="100%" />
+
+
 #### Update (Aug 25, 2020)
 
 This update adds: 
diff --git a/efficientnet_pytorch/__init__.py b/efficientnet_pytorch/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.7.0"
+__version__ = "0.7.1"
 from .model import EfficientNet, VALID_MODELS
 from .utils import (
     GlobalParams,
diff --git a/efficientnet_pytorch/model.py b/efficientnet_pytorch/model.py
@@ -50,7 +50,7 @@ class MBConvBlock(nn.Module):
     def __init__(self, block_args, global_params, image_size=None):
         super().__init__()
         self._block_args = block_args
-        self._bn_mom = 1 - global_params.batch_norm_momentum # pytorch's difference from tensorflow
+        self._bn_mom = 1 - global_params.batch_norm_momentum  # pytorch's difference from tensorflow
         self._bn_eps = global_params.batch_norm_epsilon
         self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
         self.id_skip = block_args.id_skip  # whether to use skip connection and drop connect
@@ -152,9 +152,7 @@ class EfficientNet(nn.Module):
         [1] https://arxiv.org/abs/1905.11946 (EfficientNet)
 
     Example:
-        
-        
-        import torch
+        >>> import torch
         >>> from efficientnet.model import EfficientNet
         >>> inputs = torch.rand(1, 3, 224, 224)
         >>> model = EfficientNet.from_pretrained('efficientnet-b0')
@@ -198,7 +196,7 @@ def __init__(self, blocks_args=None, global_params=None):
             # The first block needs to take care of stride and filter size increase.
             self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
             image_size = calculate_output_image_size(image_size, block_args.stride)
-            if block_args.num_repeat > 1: # modify block_args to keep same output size
+            if block_args.num_repeat > 1:  # modify block_args to keep same output size
                 block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
             for _ in range(block_args.num_repeat - 1):
                 self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
@@ -213,16 +211,18 @@ def __init__(self, blocks_args=None, global_params=None):
 
         # Final linear layer
         self._avg_pooling = nn.AdaptiveAvgPool2d(1)
-        self._dropout = nn.Dropout(self._global_params.dropout_rate)
-        self._fc = nn.Linear(out_channels, self._global_params.num_classes)
+        if self._global_params.include_top:
+            self._dropout = nn.Dropout(self._global_params.dropout_rate)
+            self._fc = nn.Linear(out_channels, self._global_params.num_classes)
+
+        # set activation to memory efficient swish by default
         self._swish = MemoryEfficientSwish()
 
     def set_swish(self, memory_efficient=True):
         """Sets swish function as memory efficient (for training) or standard (for export).
 
         Args:
             memory_efficient (bool): Whether to use memory-efficient version of swish.
-
         """
         self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
         for block in self._blocks:
@@ -261,17 +261,17 @@ def extract_endpoints(self, inputs):
         for idx, block in enumerate(self._blocks):
             drop_connect_rate = self._global_params.drop_connect_rate
             if drop_connect_rate:
-                drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate
+                drop_connect_rate *= float(idx) / len(self._blocks)  # scale drop connect_rate
             x = block(x, drop_connect_rate=drop_connect_rate)
             if prev_x.size(2) > x.size(2):
-                endpoints['reduction_{}'.format(len(endpoints)+1)] = prev_x
+                endpoints['reduction_{}'.format(len(endpoints) + 1)] = prev_x
             elif idx == len(self._blocks) - 1:
-                endpoints['reduction_{}'.format(len(endpoints)+1)] = x
+                endpoints['reduction_{}'.format(len(endpoints) + 1)] = x
             prev_x = x
 
         # Head
         x = self._swish(self._bn1(self._conv_head(x)))
-        endpoints['reduction_{}'.format(len(endpoints)+1)] = x
+        endpoints['reduction_{}'.format(len(endpoints) + 1)] = x
 
         return endpoints
 
@@ -292,7 +292,7 @@ def extract_features(self, inputs):
         for idx, block in enumerate(self._blocks):
             drop_connect_rate = self._global_params.drop_connect_rate
             if drop_connect_rate:
-                drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate
+                drop_connect_rate *= float(idx) / len(self._blocks)  # scale drop connect_rate
             x = block(x, drop_connect_rate=drop_connect_rate)
 
         # Head
@@ -322,7 +322,7 @@ def forward(self, inputs):
 
     @classmethod
     def from_name(cls, model_name, in_channels=3, **override_params):
-        """create an efficientnet model according to name.
+        """Create an efficientnet model according to name.
 
         Args:
             model_name (str): Name for efficientnet.
@@ -348,7 +348,7 @@ def from_name(cls, model_name, in_channels=3, **override_params):
     @classmethod
     def from_pretrained(cls, model_name, weights_path=None, advprop=False,
                         in_channels=3, num_classes=1000, **override_params):
-        """create an efficientnet model according to name.
+        """Create an efficientnet model according to name.
 
         Args:
             model_name (str): Name for efficientnet.
@@ -375,7 +375,8 @@ def from_pretrained(cls, model_name, weights_path=None, advprop=False,
             A pretrained efficientnet model.
         """
         model = cls.from_name(model_name, num_classes=num_classes, **override_params)
-        load_pretrained_weights(model, model_name, weights_path=weights_path, load_fc=(num_classes == 1000), advprop=advprop)
+        load_pretrained_weights(model, model_name, weights_path=weights_path,
+                                load_fc=(num_classes == 1000), advprop=advprop)
         model._change_in_channels(in_channels)
         return model
 
diff --git a/efficientnet_pytorch/utils.py b/efficientnet_pytorch/utils.py
@@ -17,7 +17,7 @@
 
 
 ################################################################################
-### Help functions for model architecture
+# Help functions for model architecture
 ################################################################################
 
 # GlobalParams and BlockArgs: Two namedtuples
@@ -50,11 +50,14 @@
 GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
 BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
 
-
-# An ordinary implementation of Swish function
-class Swish(nn.Module):
-    def forward(self, x):
-        return x * torch.sigmoid(x)
+# Swish activation function
+if hasattr(nn, 'SiLU'):
+    Swish = nn.SiLU
+else:
+    # For compatibility with old PyTorch versions
+    class Swish(nn.Module):
+        def forward(self, x):
+            return x * torch.sigmoid(x)
 
 
 # A memory-efficient implementation of Swish function
@@ -71,6 +74,7 @@ def backward(ctx, grad_output):
         sigmoid_i = torch.sigmoid(i)
         return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
 
+
 class MemoryEfficientSwish(nn.Module):
     def forward(self, x):
         return SwishImplementation.apply(x)
@@ -96,10 +100,10 @@ def round_filters(filters, global_params):
     divisor = global_params.depth_divisor
     min_depth = global_params.min_depth
     filters *= multiplier
-    min_depth = min_depth or divisor # pay attention to this line when using min_depth
+    min_depth = min_depth or divisor  # pay attention to this line when using min_depth
     # follow the formula transferred from official TensorFlow implementation
     new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
-    if new_filters < 0.9 * filters: # prevent rounding by more than 10%
+    if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
         new_filters += divisor
     return int(new_filters)
 
@@ -233,7 +237,7 @@ def forward(self, x):
         ih, iw = x.size()[-2:]
         kh, kw = self.weight.size()[-2:]
         sh, sw = self.stride
-        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) # change the output size according to stride ! ! !
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)  # change the output size according to stride ! ! !
         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
         if pad_h > 0 or pad_w > 0:
@@ -311,6 +315,7 @@ def forward(self, x):
         return F.max_pool2d(x, self.kernel_size, self.stride, self.padding,
                             self.dilation, self.ceil_mode, self.return_indices)
 
+
 class MaxPool2dStaticSamePadding(nn.MaxPool2d):
     """2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size.
        The padding mudule is calculated in construction function, then used in forward.
@@ -343,7 +348,7 @@ def forward(self, x):
 
 
 ################################################################################
-### Helper functions for loading model params
+# Helper functions for loading model params
 ################################################################################
 
 # BlockDecoder: A Class for encoding and decoding BlockArgs
@@ -576,7 +581,7 @@ def get_model_params(model_name, override_params):
 # TODO: add the petrained weights url map of 'efficientnet-l2'
 
 
-def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False):
+def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False, verbose=True):
     """Loads pretrained weights from weights path or download using url.
 
     Args:
@@ -607,4 +612,5 @@ def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True,
             ['_fc.weight', '_fc.bias']), 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys)
     assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.unexpected_keys)
 
-    print('Loaded pretrained weights for {}'.format(model_name))
+    if verbose:
+        print('Loaded pretrained weights for {}'.format(model_name))
diff --git a/examples/imagenet/main.py b/examples/imagenet/main.py
@@ -434,7 +434,7 @@ def accuracy(output, target, topk=(1,)):
 
         res = []
         for k in topk:
-            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
+            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
             res.append(correct_k.mul_(100.0 / batch_size))
         return res
 
diff --git a/setup.py b/setup.py
@@ -18,7 +18,7 @@
 EMAIL = 'lmelaskyriazi@college.harvard.edu'
 AUTHOR = 'Luke'
 REQUIRES_PYTHON = '>=3.5.0'
-VERSION = '0.7.0'
+VERSION = '0.7.1'
 
 # What packages are required for this module to be executed?
 REQUIRED = [

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "0.7.0"`
	`1`	`+__version__ = "0.7.1"`
`2`	`2`	`from .model import EfficientNet, VALID_MODELS`
`3`	`3`	`from .utils import (`
`4`	`4`	`GlobalParams,`