FluxML · MikeInnes · Feb 6, 2019 · Sep 5, 2018 · Sep 8, 2018 · Oct 19, 2018
diff --git a/src/Flux.jl b/src/Flux.jl
@@ -6,7 +6,7 @@ using Base: tail
 using MacroTools, Juno, Requires, Reexport, Statistics, Random
 using MacroTools: @forward
 
-export Chain, Dense, RNN, LSTM, GRU, Conv, MaxPool, MeanPool,
+export Chain, Dense, RNN, LSTM, GRU, Conv, ConvTranspose, MaxPool, MeanPool,
        DepthwiseConv, Dropout, LayerNorm, BatchNorm,
        params, mapleaves, cpu, gpu
 

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -1,4 +1,4 @@
-using NNlib: conv, depthwiseconv
+using NNlib: conv, ∇conv_data, depthwiseconv
 
 @generated sub2(::Val{N}) where N = :(Val($(N-2)))
 
@@ -57,6 +57,47 @@ end
 (a::Conv{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
   a(T.(x))
 
+"""
+    ConvTranspose(size, in=>out)
+    ConvTranspose(size, in=>out, relu)
+
+Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`.
+`in` and `out` specify the number of input and output channels respectively.
+Data should be stored in WHCN order. In other words, a 100×100 RGB image would
+be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
+Takes the keyword arguments `pad`, `stride` and `dilation`.
+"""
+struct ConvTranspose{N,F,A,V}
+  σ::F
+  weight::A
+  bias::V
+  stride::NTuple{N,Int}
+  pad::NTuple{N,Int}
+  dilation::NTuple{N,Int}
+end
+
+ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
+              stride = 1, pad = 0, dilation = 1) where {T,N} =
+  ConvTranspose(σ, w, b, expand.(sub2(Val(N)), (stride, pad, dilation))...)
+
+ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = initn,
+              stride = 1, pad = 0, dilation = 1) where N =
+ConvTranspose(param(init(k..., reverse(ch)...)), param(zeros(ch[2])), σ,
+              stride = stride, pad = pad, dilation = dilation)
+
+@treelike ConvTranspose
+
+function (c::ConvTranspose)(x)
+  # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
+  σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
+  σ.(∇conv_data(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation) .+ b)
+end
+
+function Base.show(io::IO, l::ConvTranspose)
+  print(io, "ConvTranspose(", size(l.weight)[1:ndims(l.weight)-2])
+end
+
+
 """
     DepthwiseConv(size, in)
     DepthwiseConv(size, in=>mul)
@@ -77,6 +118,7 @@ struct DepthwiseConv{N,F,A,V}
   bias::V
   stride::NTuple{N,Int}
   pad::NTuple{N,Int}
+  dilation::NTuple{N,Int}
 end
 
 DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;

diff --git a/src/tracker/lib/array.jl b/src/tracker/lib/array.jl
@@ -356,7 +356,7 @@ x::TrackedVector  * y::TrackedVector  = track(*, x, y)
 # NNlib
 
 using NNlib
-import NNlib: softmax, ∇softmax, logsoftmax, ∇logsoftmax, conv, depthwiseconv, maxpool, meanpool
+import NNlib: softmax, ∇softmax, logsoftmax, ∇logsoftmax, conv, ∇conv_data, depthwiseconv, maxpool, meanpool
 
 softmax(xs::TrackedArray) = track(softmax, xs)
 
@@ -383,8 +383,18 @@ conv(x::TrackedArray,  w::AbstractArray; kw...) = track(conv, x, w; kw...)
 @grad conv(x, w; kw...) =
   conv(data(x), data(w); kw...),
     Δ -> nobacksies(:conv,
-      (NNlib.∇conv_data(data.((Δ, x, w))...; kw...),
-       NNlib.∇conv_filter(data.((Δ, x, w))...; kw...)))
+      (NNlib.∇conv_data(data.((Δ, w))...; size=size(x), kw...),
+       NNlib.∇conv_filter(data.((Δ, x))...; size=size(w), kw...)))
+
+∇conv_data(x::TrackedArray,  w::TrackedArray;  kw...) = track(∇conv_data, x, w; kw...)
+∇conv_data(x::AbstractArray, w::TrackedArray;  kw...) = track(∇conv_data, x, w; kw...)
+∇conv_data(x::TrackedArray,  w::AbstractArray; kw...) = track(∇conv_data, x, w; kw...)
+
+@grad ∇conv_data(x, w; kw...) =
+  ∇conv_data(data(x), data(w); kw...),
+    Δ -> nobacksies(:conv,
+      (NNlib.conv(data.((Δ, w))...; size=size(x), kw...),
+       NNlib.∇conv_filter(data.((x, Δ))...; size=size(w), kw...)))
 
 maxpool(x::TrackedArray, k; kw...) = track(maxpool, x, k; kw...)
 

diff --git a/test/tracker.jl b/test/tracker.jl
@@ -1,7 +1,11 @@
 using Flux
 using Flux.Tracker, Test, NNlib
+<<<<<<< HEAD
 using Flux.Tracker: TrackedReal, gradcheck, grad, checkpoint
-using NNlib: conv, depthwiseconv
+=======
+using Flux.Tracker: TrackedReal, gradcheck, grad, derivative, checkpoint
+>>>>>>> a657c287d0590fdd9e49bb68c35bf96febe45e6d
+using NNlib: conv, ∇conv_data, depthwiseconv
 using Printf: @sprintf
 using LinearAlgebra: diagm, dot, LowerTriangular, norm
 using Statistics: mean, std
@@ -182,12 +186,20 @@ end
   2y + x
 end
 
-@test gradtest(conv, rand(10, 3, 2), randn(Float64,2, 3, 2))
-@test gradtest(conv, rand(10, 10, 3, 2), randn(Float64,2, 2, 3, 2))
-@test gradtest(conv, rand(10, 10, 10, 3, 2), randn(Float64,2, 2, 2, 3, 2))
+@test gradtest(conv, rand(10, 3, 2), randn(Float64, 2, 3, 2))
+@test gradtest(conv, rand(10, 10, 3, 2), randn(Float64, 2, 2, 3, 2))
+@test gradtest(conv, rand(10, 10, 10, 3, 2), randn(Float64, 2, 2, 2, 3, 2))
+
+@test gradtest(∇conv_data, rand(10, 3, 2), randn(Float64, 2, 2, 3))
+@test gradtest(∇conv_data, rand(10, 10, 3, 2), randn(Float64,2, 2, 2, 3))
+@test gradtest(∇conv_data, rand(10, 10, 10, 3, 2), randn(Float64,2, 2, 2, 2, 3))
 
 @test gradtest(depthwiseconv, rand(10,10,3,2), randn(2, 2, 2, 3))
 
+@test gradtest(∇conv_data, rand(10, 3, 2), randn(Float64, 2, 2, 3))
+@test gradtest(∇conv_data, rand(10, 10, 3, 2), randn(Float64, 2, 2, 2, 3))
+@test gradtest(∇conv_data, rand(10, 10, 10, 3, 2), randn(Float64, 2, 2, 2, 2, 3))
+
 @test gradtest(x -> maxpool(x, (2,2)), rand(10, 10, 3, 2))
 @test gradtest(x -> maxpool(x, (2,2,2)), rand(10, 10, 10, 3, 2))