FluxML · dcecchini · Nov 27, 2021 · Nov 27, 2021 · Nov 27, 2021 · Nov 28, 2021
diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl
@@ -430,3 +430,106 @@ Recur(m::GRUv3Cell) = Recur(m, m.state0)
 @adjoint function Broadcast.broadcasted(f::Recur, args...)
   Zygote.∇map(__context__, f, args...)
 end
+
+
+"""
+    Bidirectional{A,B} 
+
+A wrapper layer that allows the use of [bidirectional](https://ieeexplore.ieee.org/document/650093) layers. It contains two parts that are Flux layers: `forward` and `backward` where 
+the forward layer weights are concatenated with the reversed order of the backward layer weights.
+
+It is intended to be used with recurrent layers such as `LSTM`, `GRU` or `RNN` to benefit from the sequential information that recurrent 
+layers have, but it will not raise an error if used with a different layer such as `Dense`, as long as the layer is compatible with the concatenation function `vcat`.
+
+For flexibility, it is possible to use the contructor `Bidirectional(rnn, in::Int, out::Int, a...; ka...)` by passing the input and output dimensions 
+together with the desired recurrent layers (one of `LSTM`, `GRU`, or `RNN`). Check the examples below for more details.
+
+# Examples
+
+1. Using the flexible constructor to create a bidirectional LSTM layer (BiLSTM):
+```jldoctest
+julia> BLSTM = Bidirectional(LSTM, 3, 5) 
+Bidirectional(
+  Recur(
+    LSTMCell(3, 5),                     # 190 parameters
+  ),
+  Recur(
+    LSTMCell(3, 5),                     # 190 parameters
+  ),
+)         # Total: 10 trainable arrays, 380 parameters,
+          # plus 4 non-trainable, 20 parameters, summarysize 2.141 KiB.
+```
+2. Checking the dimension after running the bidirectional layer on an input vector. Shows that the 
+dimension of the output is twice the dimension of the input:
+```jldoctest
+julia> Bidirectional(LSTM, 3, 5)(rand(Float32, 3)) |> size
+(10,)
+```
+3. It is possible to use the bidirectional layer inside the `Chain` container:
+```
+julia> model = Chain(Embedding(10000, 200), Bidirectional(LSTM, 200, 128), Dense(256, 5), softmax)
+Chain(
+  Embedding(10000, 200),                # 2_000_000 parameters
+  Bidirectional(
+    Recur(
+      LSTMCell(200, 128),               # 168_704 parameters
+    ),
+    Recur(
+      LSTMCell(200, 128),               # 168_704 parameters
+    ),
+  ),
+  Dense(256, 5),                        # 1_285 parameters
+  NNlib.softmax,
+)         # Total: 13 trainable arrays, 2_338_693 parameters,
+          # plus 4 non-trainable, 512 parameters, summarysize 8.922 MiB.
+```
+4. It is also possible to use the default constructor
+```jldoctest
+julia> BiLSTM = Bidirectional(GRU(3, 5), GRU(3, 5))
+Bidirectional(
+  Recur(
+    GRUCell(3, 5),                      # 140 parameters
+  ),
+  Recur(
+    GRUCell(3, 5),                      # 140 parameters
+  ),
+)         # Total: 8 trainable arrays, 280 parameters,
+          # plus 2 non-trainable, 10 parameters, summarysize 1.562 KiB.
+```
+5. And use other parameters available on the recurrent layers
+```jldoctest
+julia> BiLSTM = Bidirectional(RNN(3, 5, tanh; init=glorot_normal), LSTM(3, 5; initb=zeros32, init_state=zeros32))
+Bidirectional(
+  Recur(
+    RNNCell(3, 5, tanh),                # 50 parameters
+  ),
+  Recur(
+    LSTMCell(3, 5),                     # 190 parameters
+  ),
+)         # Total: 9 trainable arrays, 240 parameters,
+          # plus 3 non-trainable, 15 parameters, summarysize 1.500 KiB.
+```
+"""
+struct Bidirectional{A,B} 
+  forward::A
+  backward::B
+end
+
+# Constructor that creates a bidirectional with the same layer for forward and backward
+# Needs to have `in` explicitly declared to avoid conflicts with the default construtor
+Bidirectional(rnn, in::Integer, args...; ka...) = Bidirectional(rnn(in, args...; ka...), rnn(in, args...; ka...))
+
+# Concatenate the forward and reversed backward weights
+function (m::Bidirectional)(x::Union{AbstractVecOrMat{T},OneHotArray}) where {T}
+  return vcat(m.forward(x), reverse(m.backward(reverse(x; dims=1)); dims=1))
-  return vcat(m.forward(x), reverse(m.backward(reverse(x; dims=1)); dims=1))
+  return vcat(m.forward(x), reverse(m.backward(reverse(x; dims=3)); dims=3))
 function (m::Recur)(x::AbstractArray{T, 3}) where T 
 Folding over a 3d Array of dimensions `(features, batch, time)` is also supported: 
-  return vcat(m.forward(x), reverse(m.backward(reverse(x; dims=1)); dims=1))
+  return vcat(m.forward(x), reverse(m.backward(reverse(x; dims=3)); dims=3))
 function (m::Recur)(x::AbstractArray{T, 3}) where T 
 Folding over a 3d Array of dimensions `(features, batch, time)` is also supported: 
+end
+
+@functor Bidirectional
+
+function Base.show(io::IO, b::Bidirectional)
+  print(io, "Bidirectional(")
+  show(io, b.forward.cell)
+  print(io, ", ")
+  show(io, b.backward.cell)
+  print(io, ")")
+end;
diff --git a/src/layers/show.jl b/src/layers/show.jl
@@ -1,6 +1,6 @@
 
 for T in [
-    :Chain, :Parallel, :SkipConnection, :Recur  # container types
+    :Chain, :Parallel, :SkipConnection, :Recur, :Bidirectional  # container types
   ]
   @eval function Base.show(io::IO, m::MIME"text/plain", x::$T)
     if get(io, :typeinfo, nothing) === nothing  # e.g. top level in REPL