-
-
Notifications
You must be signed in to change notification settings - Fork 66
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RFC: DenseNet rewrite for correctness #241
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,32 @@ | ||
""" | ||
dense_bottleneck(inplanes, outplanes; expansion=4) | ||
function dense_bottleneck(inplanes::Integer, growth_rate::Integer, bn_size::Integer, | ||
dropout_prob) | ||
return Chain(cat_channels, | ||
conv_norm((1, 1), inplanes, bn_size * growth_rate; | ||
revnorm = true)..., | ||
conv_norm((3, 3), bn_size * growth_rate, growth_rate; | ||
pad = 1, revnorm = true)..., | ||
Dropout(dropout_prob)) | ||
end | ||
|
||
Create a Densenet bottleneck layer | ||
([reference](https://arxiv.org/abs/1608.06993)). | ||
function dense_block(inplanes::Integer, num_layers::Integer, bn_size::Integer, | ||
growth_rate::Integer, dropout_prob) | ||
layers = [dense_bottleneck(inplanes + (i - 1) * growth_rate, growth_rate, bn_size, | ||
dropout_prob) for i in 1:num_layers] | ||
Comment on lines
+13
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't you take this vector and build the nested There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm afraid I can't quite immediately see that implementation 😅 Could you give me an example? |
||
return DenseBlock(layers) | ||
end | ||
|
||
# Arguments | ||
struct DenseBlock | ||
layers::Any | ||
end | ||
@functor DenseBlock | ||
|
||
- `inplanes`: number of input feature maps | ||
- `outplanes`: number of output feature maps on bottleneck branch | ||
(and scaling factor for inner feature maps; see ref) | ||
""" | ||
function dense_bottleneck(inplanes::Int, outplanes::Int; expansion::Int = 4) | ||
return SkipConnection(Chain(conv_norm((1, 1), inplanes, expansion * outplanes; | ||
revnorm = true)..., | ||
conv_norm((3, 3), expansion * outplanes, outplanes; | ||
pad = 1, revnorm = true)...), cat_channels) | ||
function (m::DenseBlock)(x) | ||
input = [x] | ||
for layer in m.layers | ||
x = layer(input) | ||
input = vcat(input, [x]) | ||
end | ||
return cat_channels(input...) | ||
end | ||
Comment on lines
+23
to
30
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is where I think the biggest change is, and where I think the code could use the most input. Is this the best way of doing this sequence of operations? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe function (m::DenseBlock)(x)
input = (x,)
for layer in m.layers
x = layer(input)
input = (input..., x)
end
return cat_channels(input)
end to bypass the Zygote There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even better, we can remove function (m::DenseBlock)(x)
input = x
for layer in m.layers
x = layer(input)
input = cat_channels(input, x)
end
return input
end |
||
|
||
""" | ||
|
@@ -28,126 +40,54 @@ Create a DenseNet transition sequence | |
- `inplanes`: number of input feature maps | ||
- `outplanes`: number of output feature maps | ||
""" | ||
function transition(inplanes::Int, outplanes::Int) | ||
function transition(inplanes::Integer, outplanes::Integer) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just a general note, since in practice allowing an abstract integer doesn't add any value compared to just restricting to |
||
return Chain(conv_norm((1, 1), inplanes, outplanes; revnorm = true)..., | ||
MeanPool((2, 2))) | ||
end | ||
|
||
""" | ||
dense_block(inplanes, growth_rates) | ||
|
||
Create a sequence of DenseNet bottlenecks increasing | ||
the number of output feature maps by `growth_rates` with each block | ||
([reference](https://arxiv.org/abs/1608.06993)). | ||
|
||
# Arguments | ||
|
||
- `inplanes`: number of input feature maps to the full sequence | ||
- `growth_rates`: the growth (additive) rates of output feature maps | ||
after each block (a vector of `k`s from the ref) | ||
""" | ||
function dense_block(inplanes::Int, growth_rates) | ||
return [dense_bottleneck(i, o) | ||
for (i, o) in zip(inplanes .+ cumsum([0, growth_rates[1:(end - 1)]...]), | ||
growth_rates)] | ||
end | ||
|
||
""" | ||
densenet(inplanes, growth_rates; reduction = 0.5, dropout_prob = nothing, | ||
inchannels = 3, nclasses = 1000) | ||
|
||
Create a DenseNet model | ||
([reference](https://arxiv.org/abs/1608.06993)). | ||
|
||
# Arguments | ||
|
||
- `inplanes`: the number of input feature maps to the first dense block | ||
- `growth_rates`: the growth rates of output feature maps within each | ||
[`dense_block`](@ref) (a vector of vectors) | ||
- `reduction`: the factor by which the number of feature maps is scaled across each transition | ||
- `dropout_prob`: the dropout probability for the classifier head. Set to `nothing` to disable dropout. | ||
- `nclasses`: the number of output classes | ||
""" | ||
function build_densenet(inplanes::Int, growth_rates; reduction = 0.5, | ||
dropout_prob = nothing, | ||
inchannels::Int = 3, nclasses::Int = 1000) | ||
function build_densenet(growth_rate::Integer, inplanes::Integer, | ||
block_config::AbstractVector{<:Integer}; | ||
bn_size::Integer = 4, reduction = 0.5, dropout_prob = 0.0, | ||
inchannels::Integer = 3, nclasses::Integer = 1000) | ||
layers = [] | ||
append!(layers, | ||
conv_norm((7, 7), inchannels, inplanes; stride = 2, pad = (3, 3))) | ||
push!(layers, MaxPool((3, 3); stride = 2, pad = (1, 1))) | ||
outplanes = 0 | ||
for (i, rates) in enumerate(growth_rates) | ||
outplanes = inplanes + sum(rates) | ||
append!(layers, dense_block(inplanes, rates)) | ||
(i != length(growth_rates)) && | ||
push!(layers, transition(outplanes, floor(Int, outplanes * reduction))) | ||
inplanes = floor(Int, outplanes * reduction) | ||
nfeatures = inplanes | ||
for (i, num_layers) in enumerate(block_config) | ||
push!(layers, | ||
dense_block(nfeatures, num_layers, bn_size, growth_rate, dropout_prob)) | ||
nfeatures += num_layers * growth_rate | ||
if (i != length(block_config)) | ||
push!(layers, transition(nfeatures, floor(Int, nfeatures * reduction))) | ||
nfeatures = floor(Int, nfeatures * reduction) | ||
end | ||
end | ||
push!(layers, BatchNorm(outplanes, relu)) | ||
return Chain(Chain(layers...), create_classifier(outplanes, nclasses; dropout_prob)) | ||
push!(layers, BatchNorm(nfeatures, relu)) | ||
return Chain(Chain(layers...), create_classifier(nfeatures, nclasses; dropout_prob)) | ||
end | ||
|
||
""" | ||
densenet(nblocks::AbstractVector{Int}; growth_rate = 32, | ||
reduction = 0.5, dropout_prob = nothing, inchannels = 3, | ||
nclasses = 1000) | ||
|
||
Create a DenseNet model | ||
([reference](https://arxiv.org/abs/1608.06993)). | ||
|
||
# Arguments | ||
|
||
- `nblocks`: number of dense blocks between transitions | ||
- `growth_rate`: the output feature map growth probability of dense blocks (i.e. `k` in the ref) | ||
- `reduction`: the factor by which the number of feature maps is scaled across each transition | ||
- `dropout_prob`: the dropout probability for the classifier head. Set to `nothing` to disable dropout | ||
- `inchannels`: the number of input channels | ||
- `nclasses`: the number of output classes | ||
""" | ||
function densenet(nblocks::AbstractVector{Int}; growth_rate::Int = 32, | ||
reduction = 0.5, dropout_prob = nothing, inchannels::Int = 3, | ||
nclasses::Int = 1000) | ||
return build_densenet(2 * growth_rate, [fill(growth_rate, n) for n in nblocks]; | ||
reduction, dropout_prob, inchannels, nclasses) | ||
function densenet(block_config::AbstractVector{<:Integer}; growth_rate::Integer = 32, | ||
inplanes::Integer = 2 * growth_rate, dropout_prob = 0.0, | ||
inchannels::Integer = 3, nclasses::Integer = 1000) | ||
return build_densenet(growth_rate, inplanes, block_config; | ||
dropout_prob, inchannels, nclasses) | ||
end | ||
|
||
const DENSENET_CONFIGS = Dict(121 => [6, 12, 24, 16], | ||
161 => [6, 12, 36, 24], | ||
169 => [6, 12, 32, 32], | ||
201 => [6, 12, 48, 32]) | ||
|
||
""" | ||
DenseNet(config::Int; pretrain = false, growth_rate = 32, | ||
reduction = 0.5, inchannels = 3, nclasses = 1000) | ||
|
||
Create a DenseNet model with specified configuration. Currently supported values are (121, 161, 169, 201) | ||
([reference](https://arxiv.org/abs/1608.06993)). | ||
|
||
# Arguments | ||
|
||
- `config`: the configuration of the model | ||
- `pretrain`: whether to load the model with pre-trained weights for ImageNet. | ||
- `growth_rate`: the output feature map growth probability of dense blocks (i.e. `k` in the ref) | ||
- `reduction`: the factor by which the number of feature maps is scaled across each transition | ||
- `inchannels`: the number of input channels | ||
- `nclasses`: the number of output classes | ||
|
||
!!! warning | ||
|
||
`DenseNet` does not currently support pretrained weights. | ||
|
||
See also [`Metalhead.densenet`](@ref). | ||
""" | ||
struct DenseNet | ||
layers::Any | ||
end | ||
@functor DenseNet | ||
|
||
function DenseNet(config::Int; pretrain::Bool = false, growth_rate::Int = 32, | ||
reduction = 0.5, inchannels::Int = 3, nclasses::Int = 1000) | ||
function DenseNet(config::Integer; pretrain::Bool = false, growth_rate::Integer = 32, | ||
inchannels::Integer = 3, nclasses::Integer = 1000) | ||
_checkconfig(config, keys(DENSENET_CONFIGS)) | ||
layers = densenet(DENSENET_CONFIGS[config]; growth_rate, reduction, inchannels, | ||
nclasses) | ||
layers = densenet(DENSENET_CONFIGS[config]; growth_rate, inchannels, nclasses) | ||
model = DenseNet(layers) | ||
if pretrain | ||
artifact_name = string("densenet", config) | ||
|
@@ -164,5 +104,5 @@ classifier(m::DenseNet) = m.layers[2] | |
## HACK TO LOAD OLD WEIGHTS, remove when we have a new artifact | ||
function Flux.loadmodel!(m::DenseNet, src) | ||
Flux.loadmodel!(m.layers[1], src.layers[1]) | ||
Flux.loadmodel!(m.layers[2], src.layers[2]) | ||
return Flux.loadmodel!(m.layers[2], src.layers[2]) | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the input to this chain is a vector of arrays, according tohttps://github.com/FluxML/Metalhead.jl/blob/010d4bc72989e4392d2ce0b89e72b8d640927dd8/src/utilities.jl#L37
shouldn't we have
?