Closed
Description
When I try to stick BatchNorm in a custom layer it causes an error when calculating the gradient.
MethodError: no method matching ∇conv_data(::Array{Float64,4}, ::Array{Float32,4}, ::Array{Float32,4}; stride=(1, 1), pad=(3, 3), dilation=(1, 1))
Closest candidates are:
∇conv_data(::AbstractArray, ::AbstractArray; size, pad, stride, dilation, flipkernel) at /home/dom/.julia/packages/NNlib/UpABH/src/conv.jl:74
∇conv_data(::A<:AbstractArray, !Matched::A<:AbstractArray, !Matched::A<:AbstractArray; kw...) where A<:AbstractArray at deprecated.jl:53
The layer looks like this
struct StandardConv
conv::Conv
norm::BatchNorm
end
function StandardConv(in_size, channels::Pair ;pad=1 args...)
return StandardConv(
Conv(in_size, channels, relu ;pad=pad, args...),
BatchNorm(channels[2])
)
end
function (layer::StandardConv)(x)
x = layer.conv(x)
layer.norm(x)
end
@treelike StandardConv
The model
input_shape = rand(28,28,1,1);
body = Chain(
StandardConv((3,3),1=>1),
x -> reshape(x,:,size(x,4)),
)
body_shape = size(body(input_shape),1)
Flux.reset!(body)
head = Chain(
Dense(body_shape,512),
Dense(512,10)
)
model = Chain(
body,
head
);
Flux.reset!(model);
The loss
function loss(x,y)
y_pred = model(x) |> softmax
Flux.logitcrossentropy(y_pred,y)
end
#dummy input
x = rand(28,28,1,10)
y = Flux.onehotbatch(collect(1:10),1:10)
θ = params(model);
grads = Tracker.gradient(() -> loss(x,y) ,θ)
If you comment out layer.norm(x)
the gradients work fine?
Metadata
Metadata
Assignees
Labels
No labels
Activity