From eceaf3e47e81c7c9964931eb6e71f819bb9021ce Mon Sep 17 00:00:00 2001
From: Moksh Jain <mokshjn00@gmail.com>
Date: Tue, 5 Feb 2019 15:45:55 +0530
Subject: [PATCH] fix merge conflicts

---
 src/layers/conv.jl      |  2 +-
 src/layers/recurrent.jl |  8 ++++----
 src/layers/stateless.jl |  6 ++++++
 src/onehot.jl           | 23 ++++++++++++++++++++++-
 test/cuda/cuda.jl       |  7 +++++++
 5 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index 2562989584..54817cd6e2 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -13,7 +13,7 @@ Standard convolutional layer. `size` should be a tuple like `(2, 2)`.
 `in` and `out` specify the number of input and output channels respectively.
 
 Data should be stored in WHCN order. In other words, a 100×100 RGB image would
-be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
+be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array.
 
 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl
index 40cd322a0a..4e23e9eec3 100644
--- a/src/layers/recurrent.jl
+++ b/src/layers/recurrent.jl
@@ -84,7 +84,7 @@ end
 RNNCell(in::Integer, out::Integer, σ = tanh;
         init = glorot_uniform) =
   RNNCell(σ, param(init(out, in)), param(init(out, out)),
-          param(zeros(out)), param(init(out)))
+          param(init(out)), param(zeros(out)))
 
 function (m::RNNCell)(h, x)
   σ, Wi, Wh, b = m.σ, m.Wi, m.Wh, m.b
@@ -122,8 +122,8 @@ end
 
 function LSTMCell(in::Integer, out::Integer;
                   init = glorot_uniform)
-  cell = LSTMCell(param(init(out*4, in)), param(init(out*4, out)), param(zeros(out*4)),
-                  param(init(out)), param(init(out)))
+  cell = LSTMCell(param(init(out*4, in)), param(init(out*4, out)), param(init(out*4)),
+                  param(zeros(out)), param(zeros(out)))
   cell.b.data[gate(out, 2)] .= 1
   return cell
 end
@@ -169,7 +169,7 @@ end
 
 GRUCell(in, out; init = glorot_uniform) =
   GRUCell(param(init(out*3, in)), param(init(out*3, out)),
-          param(zeros(out*3)), param(init(out)))
+          param(init(out*3)), param(zeros(out)))
 
 function (m::GRUCell)(h, x)
   b, o = m.b, size(h, 1)
diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl
index 0da3399c05..28722b3070 100644
--- a/src/layers/stateless.jl
+++ b/src/layers/stateless.jl
@@ -44,8 +44,14 @@ logitbinarycrossentropy(logŷ, y) = (1 - y)*logŷ - logσ(logŷ)
 
     Normalises x to mean 0 and standard deviation 1, across the dimensions given by dims. Defaults to normalising over columns.
 """
+<<<<<<< HEAD
 function normalise(x::AbstractArray, dims::Int=1)
   μ′ = mean(x, dims = dims)
   σ′ = std(x, dims = dims, mean = μ′)
+=======
+function normalise(x::AbstractVecOrMat)
+  μ′ = mean(x, dims = 1)
+  σ′ = std(x, dims = 1, mean = μ′, corrected=false)
+>>>>>>> 940b1e6dbfd95ed3f64daf0a75edae713d25e7c1
   return (x .- μ′) ./ σ′
 end
diff --git a/src/onehot.jl b/src/onehot.jl
index cd29f14e9d..c82dce237a 100644
--- a/src/onehot.jl
+++ b/src/onehot.jl
@@ -9,6 +9,8 @@ Base.size(xs::OneHotVector) = (Int64(xs.of),)
 
 Base.getindex(xs::OneHotVector, i::Integer) = i == xs.ix
 
+Base.getindex(xs::OneHotVector, ::Colon) = xs
+
 A::AbstractMatrix * b::OneHotVector = A[:, b.ix]
 
 struct OneHotMatrix{A<:AbstractVector{OneHotVector}} <: AbstractMatrix{Bool}
@@ -22,6 +24,21 @@ Base.getindex(xs::OneHotMatrix, i::Integer, j::Integer) = xs.data[j][i]
 Base.getindex(xs::OneHotMatrix, ::Colon, i::Integer) = xs.data[i]
 Base.getindex(xs::OneHotMatrix, ::Colon, i::AbstractArray) = OneHotMatrix(xs.height, xs.data[i])
 
+Base.getindex(xs::Flux.OneHotMatrix, j::Base.UnitRange, i::Int) = xs.data[i][j]
+
+Base.getindex(xs::OneHotMatrix, ::Colon, ::Colon) = xs
+
+# handle special case for when we want the entire column
+function Base.getindex(xs::Flux.OneHotMatrix{T}, ot::Union{Base.Slice, Base.OneTo}, i::Int) where {T<:AbstractArray}
+  res = similar(xs, size(xs, 1), 1)
+  if length(ot) == size(xs, 1)
+    res = xs[:,i]
+  else
+    res = xs[1:length(ot),i]
+  end
+  res
+end
+
 A::AbstractMatrix * B::OneHotMatrix = A[:, map(x->x.ix, B.data)]
 
 Base.hcat(x::OneHotVector, xs::OneHotVector...) = OneHotMatrix(length(x), [x, xs...])
@@ -54,13 +71,17 @@ end
 onehotbatch(ls, labels, unk...) =
   OneHotMatrix(length(labels), [onehot(l, labels, unk...) for l in ls])
 
+Base.argmax(xs::OneHotVector) = xs.ix
+
 onecold(y::AbstractVector, labels = 1:length(y)) = labels[Base.argmax(y)]
 
 onecold(y::AbstractMatrix, labels...) =
   dropdims(mapslices(y -> onecold(y, labels...), y, dims=1), dims=1)
 
+onecold(y::OneHotMatrix, labels...) = map(x -> onecold(x, labels...), y.data)
+
 function argmax(xs...)
-  Base.depwarn("`argmax(...) is deprecated, use `onecold(...)` instead.", :argmax)
+  Base.depwarn("`argmax(...)` is deprecated, use `onecold(...)` instead.", :argmax)
   return onecold(xs...)
 end
 
diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl
index f7a085031b..43340c746f 100644
--- a/test/cuda/cuda.jl
+++ b/test/cuda/cuda.jl
@@ -38,6 +38,13 @@ Flux.back!(sum(l))
 
 end
 
+@testset "onecold gpu" begin
+  x = zeros(Float32, 10, 3) |> gpu;
+  y = Flux.onehotbatch(ones(3), 1:10) |> gpu;
+  res = Flux.onecold(x) .== Flux.onecold(y)
+  @test res isa CuArray
+end
+
 if CuArrays.libcudnn != nothing
     @info "Testing Flux/CUDNN"
     include("cudnn.jl")