From fc123d6279677de37ede997bb7decff68ac791a2 Mon Sep 17 00:00:00 2001
From: DrChainsaw <Christian.kyril.skarby@gmail.com>
Date: Sun, 20 Oct 2019 13:35:41 +0200
Subject: [PATCH 1/5] Add SamePad for conv layers

---
 src/Flux.jl         |  2 +-
 src/layers/conv.jl  | 38 ++++++++++++++++++++++++++++++++++----
 test/layers/conv.jl | 17 +++++++++++++++++
 3 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/src/Flux.jl b/src/Flux.jl
index 95bdcd3230..c6fda5dc7e 100644
--- a/src/Flux.jl
+++ b/src/Flux.jl
@@ -9,7 +9,7 @@ using MacroTools: @forward
 using Zygote: Params, @adjoint, gradient, pullback
 export gradient
 
-export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose, MaxPool, MeanPool,
+export Chain, Dense, Maxout, RNN, LSTM, GRU, SamePad, Conv, CrossCor, ConvTranspose, MaxPool, MeanPool,
        DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm,
        SkipConnection, params, fmap, cpu, gpu, f32, f64
 
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index 519f129fbc..4a7f916c04 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -2,6 +2,28 @@ using NNlib: conv, ∇conv_data, depthwiseconv
 
 expand(N, i::Tuple) = i
 expand(N, i::Integer) = ntuple(_ -> i, N)
+
+"""
+    SamePad
+
+Padding for convolutional layers will be calculated so that outputshape == inputshape when stride = 1.
+
+For stride > 1 the output shape depends on the type of convolution layer.
+"""
+struct SamePad end
+
+calc_padding(pad, k::NTuple{N,T}, dilation, stride) where {T,N}= expand(Val(2*(N-2)), pad)
+function calc_padding(::SamePad, k, dilation, stride)
+  #Formula from Relationship 14 in http://deeplearning.net/software/theano_versions/dev/tutorial/conv_arithmetic.html
+
+  # Effective kernel size, including dilation
+  k_eff = @. k + (k - 1) * (dilation - 1)
+  # How much total padding needs to be applied?
+  pad_amt = @. k_eff - 1
+  # In case amount of padding is odd we need to apply different amounts to each side.
+  return Tuple(mapfoldl(i -> [ceil(Int, i/2), i ÷ 2], vcat, pad_amt))
+end
+
 """
     Conv(size, in=>out)
     Conv(size, in=>out, relu)
@@ -22,6 +44,8 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.
 
 Takes the keyword arguments `pad`, `stride` and `dilation`.
+
+Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride
 """
 struct Conv{N,M,F,A,V}
   σ::F
@@ -35,8 +59,8 @@ end
 function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
               stride = 1, pad = 0, dilation = 1) where {T,N}
   stride = expand(Val(N-2), stride)
-  pad = expand(Val(2*(N-2)), pad)
   dilation = expand(Val(N-2), dilation)
+  pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
   return Conv(σ, w, b, stride, pad, dilation)
 end
 
@@ -79,6 +103,8 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would
 be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
 
 Takes the keyword arguments `pad`, `stride` and `dilation`.
+
+Use `pad=SamePad()` to apply padding so that outputsize == stride * inputsize - stride + 1
 """
 struct ConvTranspose{N,M,F,A,V}
   σ::F
@@ -92,8 +118,8 @@ end
 function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
               stride = 1, pad = 0, dilation = 1) where {T,N}
   stride = expand(Val(N-2), stride)
-  pad = expand(Val(2*(N-2)), pad)
   dilation = expand(Val(N-2), dilation)
+  pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
   return ConvTranspose(σ, w, b, stride, pad, dilation)
 end
 
@@ -149,6 +175,8 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would
 be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
 
 Takes the keyword arguments `pad`, `stride` and `dilation`.
+
+Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride
 """
 struct DepthwiseConv{N,M,F,A,V}
   σ::F
@@ -162,8 +190,8 @@ end
 function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
                        stride = 1, pad = 0, dilation = 1) where {T,N}
   stride = expand(Val(N-2), stride)
-  pad = expand(Val(2*(N-2)), pad)
   dilation = expand(Val(N-2), dilation)
+  pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
   return DepthwiseConv(σ, w, b, stride, pad, dilation)
 end
 
@@ -221,6 +249,8 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.
 
 Takes the keyword arguments `pad`, `stride` and `dilation`.
+
+Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride
 """
 struct CrossCor{N,M,F,A,V}
   σ::F
@@ -234,8 +264,8 @@ end
 function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
               stride = 1, pad = 0, dilation = 1) where {T,N}
   stride = expand(Val(N-2), stride)
-  pad = expand(Val(2*(N-2)), pad)
   dilation = expand(Val(N-2), dilation)
+  pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
   return CrossCor(σ, w, b, stride, pad, dilation)
 end
 
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index aa3925f1f6..d65d9fee9f 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -102,3 +102,20 @@ end
     true
   end
 end
+
+@testset "$ltype SamePad kernelsize $k" for ltype in (Conv, ConvTranspose, DepthwiseConv, CrossCor), k in ( (1,), (2,), (3,), (4,5), (6,7,8))
+  data = ones(Float32, (k .+ 3)..., 1,1)
+  l = ltype(k, 1=>1, pad=SamePad())
+  @test size(l(data)) == size(data)
+
+  l = ltype(k, 1=>1, pad=SamePad(), dilation = k .÷ 2)
+  @test size(l(data)) == size(data)
+
+  stride = 3
+  l = ltype(k, 1=>1, pad=SamePad(), stride = stride)
+  if ltype == ConvTranspose
+    @test size(l(data))[1:end-2] == stride .* size(data)[1:end-2] .- stride .- 1
+  else
+    @test size(l(data))[1:end-2] == ceil.(Int, size(data)[1:end-2] ./ stride)
+  end
+end

From 411ce5dbd873c455163a2a310336eb252745adf5 Mon Sep 17 00:00:00 2001
From: DrChainsaw <Christian.kyril.skarby@gmail.com>
Date: Sun, 20 Oct 2019 13:43:39 +0200
Subject: [PATCH 2/5] Add SamePad for pooling layers

---
 src/layers/conv.jl  | 9 ++++++---
 test/layers/conv.jl | 9 ++++++++-
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index 4a7f916c04..2b465d6511 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -308,6 +308,8 @@ end
 Max pooling layer. `k` stands for the size of the window for each dimension of the input.
 
 Takes the keyword arguments `pad` and `stride`.
+
+Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride
 """
 struct MaxPool{N,M}
   k::NTuple{N,Int}
@@ -317,8 +319,7 @@ end
 
 function MaxPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N
   stride = expand(Val(N), stride)
-  pad = expand(Val(2*N), pad)
-
+  pad = calc_padding(pad, k, 1, stride)
   return MaxPool(k, pad, stride)
 end
 
@@ -337,6 +338,8 @@ end
 Mean pooling layer. `k` stands for the size of the window for each dimension of the input.
 
 Takes the keyword arguments `pad` and `stride`.
+
+Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride
 """
 struct MeanPool{N,M}
     k::NTuple{N,Int}
@@ -346,7 +349,7 @@ end
 
 function MeanPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N
   stride = expand(Val(N), stride)
-  pad = expand(Val(2*N), pad)
+  pad = calc_padding(pad, k, 1, stride)
   return MeanPool(k, pad, stride)
 end
 
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index d65d9fee9f..750986601e 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -114,8 +114,15 @@ end
   stride = 3
   l = ltype(k, 1=>1, pad=SamePad(), stride = stride)
   if ltype == ConvTranspose
-    @test size(l(data))[1:end-2] == stride .* size(data)[1:end-2] .- stride .- 1
+    @test size(l(data))[1:end-2] == stride .* size(data)[1:end-2] .- stride .+ 1
   else
     @test size(l(data))[1:end-2] == ceil.(Int, size(data)[1:end-2] ./ stride)
   end
 end
+
+@testset "$ltype SamePad windowsize $k" for ltype in (MeanPool, MaxPool), k in ( (1,), (2,), (3,), (4,5), (6,7,8))
+    data = ones(Float32, (k .+ 3)..., 1,1)
+
+    l = ltype(k, pad=SamePad())
+    @test size(l(data))[1:end-2] == ceil.(Int, size(data)[1:end-2] ./ k)
+end

From 530d4edb679eb684c3a03503ef66dd15c762166a Mon Sep 17 00:00:00 2001
From: DrChainsaw <Christian.kyril.skarby@gmail.com>
Date: Sun, 20 Oct 2019 15:15:30 +0200
Subject: [PATCH 3/5] Fix for reading comprehension error (dim is not always 2
 * (N-2)) Fix for ambiguous method sig

---
 src/layers/conv.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index 2b465d6511..58a913da6b 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -12,8 +12,8 @@ For stride > 1 the output shape depends on the type of convolution layer.
 """
 struct SamePad end
 
-calc_padding(pad, k::NTuple{N,T}, dilation, stride) where {T,N}= expand(Val(2*(N-2)), pad)
-function calc_padding(::SamePad, k, dilation, stride)
+calc_padding(pad, k::NTuple{N,T}, dilation, stride) where {T,N}= expand(Val(2*N), pad)
+function calc_padding(::SamePad, k::NTuple{N,T}, dilation, stride) where {N,T}
   #Formula from Relationship 14 in http://deeplearning.net/software/theano_versions/dev/tutorial/conv_arithmetic.html
 
   # Effective kernel size, including dilation

From deff98812a09c4f902bc8705fa9fb4fda5a68a8d Mon Sep 17 00:00:00 2001
From: DrChainsaw <Christian.kyril.skarby@gmail.com>
Date: Fri, 24 Apr 2020 21:59:02 +0200
Subject: [PATCH 4/5] Add v0.11.0 entry and added samepadding option

---
 NEWS.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 4023c7f25b..898bf6713d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,6 @@
+# v0.11.0
+* Add option for [same padding](https://github.com/FluxML/Flux.jl/pull/901) to conv and pooling layers by setting `pad=SamePad()`. 
+
 # v0.10.0
 * The default AD engine has switched from [Tracker to Zygote.jl](https://github.com/FluxML/Flux.jl/pull/669)
   - The dependency on Tracker.jl has been removed.

From 4e4f6d9d1f7ed8b30c6d552b817d50d7450608a8 Mon Sep 17 00:00:00 2001
From: DrChainsaw <Christian.kyril.skarby@gmail.com>
Date: Fri, 24 Apr 2020 22:07:57 +0200
Subject: [PATCH 5/5] Change next version entry to 0.10.5

---
 NEWS.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 898bf6713d..460a9e5b14 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,5 @@
-# v0.11.0
-* Add option for [same padding](https://github.com/FluxML/Flux.jl/pull/901) to conv and pooling layers by setting `pad=SamePad()`. 
+# v0.10.5
+* Add option for [same padding](https://github.com/FluxML/Flux.jl/pull/901) to conv and pooling layers by setting `pad=SamePad()`.
 
 # v0.10.0
 * The default AD engine has switched from [Tracker to Zygote.jl](https://github.com/FluxML/Flux.jl/pull/669)