FluxML · theabhirath · May 14, 2023 · May 25, 2023 · CarloLucibello · Jun 2, 2023
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
@@ -2,3 +2,6 @@
 # Switched to SciML style for code
 d5d28f0ef6e1e253ecf3fdbbec2f511836c8767b
 70d639de532b046980cbea8d17fb1829e04cccfe
+
+# Formatting runs
+4343e6b320c1b9072126cc31dda0b34f5ecd9505
diff --git a/scripts/port_torchvision.jl b/scripts/port_torchvision.jl
@@ -77,7 +77,7 @@ function convert_models()
         pytorch2flux!(jlmodel, pymodel)
         rtol = startswith(name, "vit") ? 1e-2 : 1e-4 # TODO investigate why ViT is less accurate
         compare_pytorch(jlmodel, pymodel; rtol)
-        
+
         # SAVE WEIGHTS
         artifact_name = "$(name)-$weights"
         filename = joinpath(@__DIR__, "weights", name, artifact_name, "$(artifact_name)")

diff --git a/src/convnets/alexnet.jl b/src/convnets/alexnet.jl
@@ -42,7 +42,7 @@ Create a `AlexNet`.
   - `nclasses`: the number of output classes
 
 !!! warning
-    
+
     `AlexNet` does not currently support pretrained weights.
 
 See also [`alexnet`](@ref).

diff --git a/src/convnets/builders/resnet.jl b/src/convnets/builders/resnet.jl
@@ -5,7 +5,7 @@
 Creates a generic ResNet-like model.
 
 !!! info
-    
+
     This is a very generic, flexible but low level function that can be used to create any of the ResNet
     variants. For a more user friendly function, see [`Metalhead.resnet`](@ref).
 

diff --git a/src/convnets/densenet.jl b/src/convnets/densenet.jl
@@ -1,20 +1,32 @@
-"""
-    dense_bottleneck(inplanes, outplanes; expansion=4)
+function dense_bottleneck(inplanes::Integer, growth_rate::Integer, bn_size::Integer,
+                          dropout_prob)
+    return Chain(cat_channels,
-    return Chain(cat_channels,
+    return Chain(x -> cat_channels(x...),
-    return Chain(cat_channels,
+    return Chain(x -> cat_channels(x...),
+                 conv_norm((1, 1), inplanes, bn_size * growth_rate;
+                            revnorm = true)...,
+                 conv_norm((3, 3), bn_size * growth_rate, growth_rate;
+                            pad = 1, revnorm = true)...,
+                 Dropout(dropout_prob))
+end
 
-Create a Densenet bottleneck layer
-([reference](https://arxiv.org/abs/1608.06993)).
+function dense_block(inplanes::Integer, num_layers::Integer, bn_size::Integer,
+                     growth_rate::Integer, dropout_prob)
+    layers = [dense_bottleneck(inplanes + (i - 1) * growth_rate, growth_rate, bn_size,
+                                   dropout_prob) for i in 1:num_layers]
+    return DenseBlock(layers)
+end
 
-# Arguments
+struct DenseBlock
+    layers::Any
+end
+@functor DenseBlock
 
-  - `inplanes`: number of input feature maps
-  - `outplanes`: number of output feature maps on bottleneck branch
-    (and scaling factor for inner feature maps; see ref)
-"""
-function dense_bottleneck(inplanes::Int, outplanes::Int; expansion::Int = 4)
-    return SkipConnection(Chain(conv_norm((1, 1), inplanes, expansion * outplanes;
-                                          revnorm = true)...,
-                                conv_norm((3, 3), expansion * outplanes, outplanes;
-                                          pad = 1, revnorm = true)...), cat_channels)
+function (m::DenseBlock)(x)
+    input = [x]
+    for layer in m.layers
+        x = layer(input)
+        input = vcat(input, [x])
+    end
+    return cat_channels(input...)
 end
 
 """
@@ -28,126 +40,54 @@ Create a DenseNet transition sequence
   - `inplanes`: number of input feature maps
   - `outplanes`: number of output feature maps
 """
-function transition(inplanes::Int, outplanes::Int)
+function transition(inplanes::Integer, outplanes::Integer)
     return Chain(conv_norm((1, 1), inplanes, outplanes; revnorm = true)...,
                  MeanPool((2, 2)))
 end
 
-"""
-    dense_block(inplanes, growth_rates)
-
-Create a sequence of DenseNet bottlenecks increasing
-the number of output feature maps by `growth_rates` with each block
-([reference](https://arxiv.org/abs/1608.06993)).
-
-# Arguments
-
-  - `inplanes`: number of input feature maps to the full sequence
-  - `growth_rates`: the growth (additive) rates of output feature maps
-    after each block (a vector of `k`s from the ref)
-"""
-function dense_block(inplanes::Int, growth_rates)
-    return [dense_bottleneck(i, o)
-            for (i, o) in zip(inplanes .+ cumsum([0, growth_rates[1:(end - 1)]...]),
-                              growth_rates)]
-end
-
-"""
-    densenet(inplanes, growth_rates; reduction = 0.5, dropout_prob = nothing, 
-             inchannels = 3, nclasses = 1000)
-
-Create a DenseNet model
-([reference](https://arxiv.org/abs/1608.06993)).
-
-# Arguments
-
-  - `inplanes`: the number of input feature maps to the first dense block
-  - `growth_rates`: the growth rates of output feature maps within each
-    [`dense_block`](@ref) (a vector of vectors)
-  - `reduction`: the factor by which the number of feature maps is scaled across each transition
-  - `dropout_prob`: the dropout probability for the classifier head. Set to `nothing` to disable dropout.
-  - `nclasses`: the number of output classes
-"""
-function build_densenet(inplanes::Int, growth_rates; reduction = 0.5,
-                        dropout_prob = nothing,
-                        inchannels::Int = 3, nclasses::Int = 1000)
+function build_densenet(growth_rate::Integer, inplanes::Integer,
+                        block_config::AbstractVector{<:Integer};
+                        bn_size::Integer = 4, reduction = 0.5, dropout_prob = 0.0,
+                        inchannels::Integer = 3, nclasses::Integer = 1000)
     layers = []
     append!(layers,
             conv_norm((7, 7), inchannels, inplanes; stride = 2, pad = (3, 3)))
     push!(layers, MaxPool((3, 3); stride = 2, pad = (1, 1)))
-    outplanes = 0
-    for (i, rates) in enumerate(growth_rates)
-        outplanes = inplanes + sum(rates)
-        append!(layers, dense_block(inplanes, rates))
-        (i != length(growth_rates)) &&
-            push!(layers, transition(outplanes, floor(Int, outplanes * reduction)))
-        inplanes = floor(Int, outplanes * reduction)
+    nfeatures = inplanes
+    for (i, num_layers) in enumerate(block_config)
+        push!(layers,
+              dense_block(nfeatures, num_layers, bn_size, growth_rate, dropout_prob))
+        nfeatures += num_layers * growth_rate
+        if (i != length(block_config))
+            push!(layers, transition(nfeatures, floor(Int, nfeatures * reduction)))
+            nfeatures = floor(Int, nfeatures * reduction)
+        end
     end
-    push!(layers, BatchNorm(outplanes, relu))
-    return Chain(Chain(layers...), create_classifier(outplanes, nclasses; dropout_prob))
+    push!(layers, BatchNorm(nfeatures, relu))
+    return Chain(Chain(layers...), create_classifier(nfeatures, nclasses; dropout_prob))
 end
 
-"""
-    densenet(nblocks::AbstractVector{Int}; growth_rate = 32,
-             reduction = 0.5, dropout_prob = nothing, inchannels = 3,
-             nclasses = 1000)
-
-Create a DenseNet model
-([reference](https://arxiv.org/abs/1608.06993)).
-
-# Arguments
-
-  - `nblocks`: number of dense blocks between transitions
-  - `growth_rate`: the output feature map growth probability of dense blocks (i.e. `k` in the ref)
-  - `reduction`: the factor by which the number of feature maps is scaled across each transition
-  - `dropout_prob`: the dropout probability for the classifier head. Set to `nothing` to disable dropout
-  - `inchannels`: the number of input channels
-  - `nclasses`: the number of output classes
-"""
-function densenet(nblocks::AbstractVector{Int}; growth_rate::Int = 32,
-                  reduction = 0.5, dropout_prob = nothing, inchannels::Int = 3,
-                  nclasses::Int = 1000)
-    return build_densenet(2 * growth_rate, [fill(growth_rate, n) for n in nblocks];
-                          reduction, dropout_prob, inchannels, nclasses)
+function densenet(block_config::AbstractVector{<:Integer}; growth_rate::Integer = 32,
+                  inplanes::Integer = 2 * growth_rate, dropout_prob = 0.0,
+                  inchannels::Integer = 3, nclasses::Integer = 1000)
+    return build_densenet(growth_rate, inplanes, block_config;
+                          dropout_prob, inchannels, nclasses)
 end
 
 const DENSENET_CONFIGS = Dict(121 => [6, 12, 24, 16],
                               161 => [6, 12, 36, 24],
                               169 => [6, 12, 32, 32],
                               201 => [6, 12, 48, 32])
 
-"""
-    DenseNet(config::Int; pretrain = false, growth_rate = 32,
-             reduction = 0.5, inchannels = 3, nclasses = 1000)
-
-Create a DenseNet model with specified configuration. Currently supported values are (121, 161, 169, 201)
-([reference](https://arxiv.org/abs/1608.06993)).
-
-# Arguments
-
-    - `config`: the configuration of the model
-    - `pretrain`: whether to load the model with pre-trained weights for ImageNet.
-    - `growth_rate`: the output feature map growth probability of dense blocks (i.e. `k` in the ref)
-    - `reduction`: the factor by which the number of feature maps is scaled across each transition
-    - `inchannels`: the number of input channels
-    - `nclasses`: the number of output classes
-
-!!! warning
-
-    `DenseNet` does not currently support pretrained weights.
-
-See also [`Metalhead.densenet`](@ref).
-"""
 struct DenseNet
     layers::Any
 end
 @functor DenseNet
 
-function DenseNet(config::Int; pretrain::Bool = false, growth_rate::Int = 32,
-                  reduction = 0.5, inchannels::Int = 3, nclasses::Int = 1000)
+function DenseNet(config::Integer; pretrain::Bool = false, growth_rate::Integer = 32,
+                  inchannels::Integer = 3, nclasses::Integer = 1000)
     _checkconfig(config, keys(DENSENET_CONFIGS))
-    layers = densenet(DENSENET_CONFIGS[config]; growth_rate, reduction, inchannels,
-                      nclasses)
+    layers = densenet(DENSENET_CONFIGS[config]; growth_rate, inchannels, nclasses)
     model = DenseNet(layers)
     if pretrain
         artifact_name = string("densenet", config)
@@ -164,5 +104,5 @@ classifier(m::DenseNet) = m.layers[2]
 ## HACK TO LOAD OLD WEIGHTS, remove when we have a new artifact
 function Flux.loadmodel!(m::DenseNet, src)
     Flux.loadmodel!(m.layers[1], src.layers[1])
-    Flux.loadmodel!(m.layers[2], src.layers[2])
+    return Flux.loadmodel!(m.layers[2], src.layers[2])
 end
diff --git a/src/convnets/efficientnets/efficientnet.jl b/src/convnets/efficientnets/efficientnet.jl
@@ -73,7 +73,7 @@ Create an EfficientNet model ([reference](https://arxiv.org/abs/1905.11946v5)).
   - `nclasses`: number of output classes.
 
 !!! warning
-    
+
     EfficientNet does not currently support pretrained weights.
 
 See also [`Metalhead.efficientnet`](@ref).

diff --git a/src/convnets/efficientnets/efficientnetv2.jl b/src/convnets/efficientnets/efficientnetv2.jl
@@ -76,7 +76,7 @@ Create an EfficientNetv2 model ([reference](https://arxiv.org/abs/2104.00298)).
   - `nclasses`: number of output classes
 
 !!! warning
-    
+
     `EfficientNetv2` does not currently support pretrained weights.
 
 See also [`efficientnet`](#).

diff --git a/src/convnets/hybrid/convnext.jl b/src/convnets/hybrid/convnext.jl
@@ -113,7 +113,7 @@ Creates a ConvNeXt model.
   - `nclasses`: number of output classes
 
 !!! warning
-    
+
     `ConvNeXt` does not currently support pretrained weights.
 
 See also [`Metalhead.convnext`](@ref).

diff --git a/src/convnets/inceptions/googlenet.jl b/src/convnets/inceptions/googlenet.jl
@@ -86,7 +86,7 @@ Create an Inception-v1 model (commonly referred to as `GoogLeNet`)
   - `bias`: set to `true` to use bias in the convolution layers
 
 !!! warning
-    
+
     `GoogLeNet` does not currently support pretrained weights.
 
 See also [`Metalhead.googlenet`](@ref).

diff --git a/src/convnets/inceptions/inceptionresnetv2.jl b/src/convnets/inceptions/inceptionresnetv2.jl
@@ -109,7 +109,7 @@ Creates an InceptionResNetv2 model.
   - `nclasses`: the number of output classes.
 
 !!! warning
-    
+
     `InceptionResNetv2` does not currently support pretrained weights.
 
 See also [`Metalhead.inceptionresnetv2`](@ref).

diff --git a/src/convnets/inceptions/inceptionv3.jl b/src/convnets/inceptions/inceptionv3.jl
@@ -170,7 +170,7 @@ Create an Inception-v3 model ([reference](https://arxiv.org/abs/1512.00567v3)).
   - `nclasses`: the number of output classes
 
 !!! warning
-    
+
     `Inceptionv3` does not currently support pretrained weights.
 
 See also [`Metalhead.inceptionv3`](@ref).

diff --git a/src/convnets/inceptions/inceptionv4.jl b/src/convnets/inceptions/inceptionv4.jl
@@ -124,7 +124,7 @@ Creates an Inceptionv4 model.
   - `nclasses`: the number of output classes.
 
 !!! warning
-    
+
     `Inceptionv4` does not currently support pretrained weights.
 
 See also [`Metalhead.inceptionv4`](@ref).

diff --git a/src/convnets/inceptions/xception.jl b/src/convnets/inceptions/xception.jl
@@ -80,7 +80,7 @@ Creates an Xception model.
   - `nclasses`: the number of output classes.
 
 !!! warning
-    
+
     `Xception` does not currently support pretrained weights.
 
 See also [`Metalhead.xception`](@ref).

diff --git a/src/convnets/mobilenets/mnasnet.jl b/src/convnets/mobilenets/mnasnet.jl
@@ -86,7 +86,7 @@ Creates a MNASNet model with the specified configuration.
   - `nclasses`: The number of output classes
 
 !!! warning
-    
+
     `MNASNet` does not currently support pretrained weights.
 
 See also [`Metalhead.mnasnet`](@ref).

diff --git a/src/convnets/mobilenets/mobilenetv1.jl b/src/convnets/mobilenets/mobilenetv1.jl
@@ -52,7 +52,7 @@ Create a MobileNetv1 model with the baseline configuration
   - `nclasses`: The number of output classes
 
 !!! warning
-    
+
     `MobileNetv1` does not currently support pretrained weights.
 
 See also [`Metalhead.mobilenetv1`](@ref).

diff --git a/src/convnets/mobilenets/mobilenetv2.jl b/src/convnets/mobilenets/mobilenetv2.jl
@@ -58,7 +58,7 @@ Create a MobileNetv2 model with the specified configuration.
   - `nclasses`: The number of output classes
 
 !!! warning
-    
+
     `MobileNetv2` does not currently support pretrained weights.
 
 See also [`Metalhead.mobilenetv2`](@ref).

diff --git a/src/convnets/mobilenets/mobilenetv3.jl b/src/convnets/mobilenets/mobilenetv3.jl
@@ -78,7 +78,7 @@ Set `pretrain = true` to load the model with pre-trained weights for ImageNet.
   - `nclasses`: the number of output classes
 
 !!! warning
-    
+
     `MobileNetv3` does not currently support pretrained weights.
 
 See also [`Metalhead.mobilenetv3`](@ref).

diff --git a/src/convnets/resnets/core.jl b/src/convnets/resnets/core.jl
@@ -17,25 +17,24 @@ used to build the block for the model, see [`Metalhead.basicblock_builder`](@ref
   - `planes`: number of feature maps for the block
   - `stride`: the stride of the block
   - `reduction_factor`: the factor by which the input feature maps are reduced before
-  the first convolution.
+    the first convolution.
   - `activation`: the activation function to use.
   - `norm_layer`: the normalization layer to use.
   - `revnorm`: set to `true` to place the normalisation layer before the convolution
   - `drop_block`: the drop block layer
   - `drop_path`: the drop path layer
   - `attn_fn`: the attention function to use. See [`squeeze_excite`](@ref) for an example.
 """
-function basicblock(inplanes::Integer, 
-                    planes::Integer; 
+function basicblock(inplanes::Integer,
+                    planes::Integer;
                     stride::Integer = 1,
-                    reduction_factor::Integer = 1, 
+                    reduction_factor::Integer = 1,
                     activation = relu,
-                    norm_layer = BatchNorm, 
+                    norm_layer = BatchNorm,
                     revnorm::Bool = false,
-                    drop_block = identity, 
+                    drop_block = identity,
                     drop_path = identity,
                     attn_fn = planes -> identity)
-
     first_planes = planes ÷ reduction_factor
     conv_bn1 = conv_norm((3, 3), inplanes, first_planes, identity; norm_layer, revnorm,
                          stride, pad = 1)
@@ -197,7 +196,7 @@ If `outplanes > inplanes`, it maps the input to `outplanes` channels using a 1x1
 layer and zero padding.
 
 !!! warning
-    
+
     This does not currently support the scenario where `inplanes > outplanes`.
 
 # Arguments
@@ -243,7 +242,7 @@ on how to use this function.
 # Arguments
 
   - `stem_type`: The type of stem to be built. One of `[:default, :deep, :deep_tiered]`.
-    
+
       + `:default`: Builds a stem based on the default ResNet stem, which consists of a single
         7x7 convolution with stride 2 and a normalisation layer followed by a 3x3 max pooling
         layer with stride 2.