Cannot use custom Differentiable structure in a Layer input

I'm trying to implement a UNet-like segmentation network, here is the complete definition:

```swift
struct TensorPair<Scalar: TensorFlowFloatingPoint>: AdditiveArithmetic, Differentiable {
    var lhs: Tensor<Scalar>
    var rhs: Tensor<Scalar>

    init(_ lhs: Tensor<Scalar>, _ rhs: Tensor<Scalar>) {
        self.lhs = lhs
        self.rhs = rhs
    }
}

struct ReLU<Scalar: TensorFlowFloatingPoint>: ParameterlessLayer {
    typealias TangentVector = EmptyTangentVector

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        relu(input)
    }
}

struct DoubleConv<Scalar: TensorFlowFloatingPoint>: Layer {
    var conv1, conv2: Conv2D<Scalar>
    var bn1, bn2: BatchNorm<Scalar>
    let relu = ReLU<Scalar>()

    init(_ inChannels: Int, _ outChannels: Int) {
        conv1 = Conv2D(filterShape: (3, 3, inChannels, outChannels), padding: .same, useBias: false)
        bn1 = BatchNorm(featureCount: outChannels)
        conv2 = Conv2D(filterShape: (3, 3, outChannels, outChannels), padding: .same, useBias: false)
        bn2 = BatchNorm(featureCount: outChannels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu)
    }
}

struct ResidualBlock<Scalar: TensorFlowFloatingPoint>: Layer {
    var conv1, conv2: Conv2D<Scalar>
    var bn1, bn2: BatchNorm<Scalar>
    var relu = ReLU<Scalar>()

    init(channels: Int) {
        conv1 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
        conv2 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
        bn1 = BatchNorm(featureCount: channels)
        bn2 = BatchNorm(featureCount: channels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu) + input
    }
}

struct Down<Scalar: TensorFlowFloatingPoint>: Layer {
    var downsample: Conv2D<Scalar>
    var conv: ResidualBlock<Scalar>

    init(_ inChannels: Int, _ outChannels: Int) {
        downsample = Conv2D(filterShape: (3, 3, inChannels, outChannels), strides: (2, 2), padding: .same)
        conv = ResidualBlock(channels: outChannels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        input.sequenced(through: downsample, conv)
    }
}

struct Up<Scalar: TensorFlowFloatingPoint>: Layer {
    var upsample: TransposedConv2D<Scalar>
    var conv: ResidualBlock<Scalar>

    init(_ inChannels: Int, _ outChannels: Int) {
        upsample = TransposedConv2D(filterShape: (2, 2, outChannels, inChannels), strides: (2, 2))
        conv = ResidualBlock(channels: outChannels)
    }

    @differentiable
    func callAsFunction(_ input: TensorPair<Scalar>) -> Tensor<Scalar> {
        conv(upsample(input.lhs) + input.rhs)
    }
}

struct Head<Scalar: TensorFlowFloatingPoint>: Layer {
    var conv: Conv2D<Scalar>

    init(_ inChannels: Int, _ outChannels: Int) {
        self.conv = Conv2D(filterShape: (1, 1, inChannels, outChannels), padding: .same)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        conv(input)
    }
}

struct UNet<Scalar: TensorFlowFloatingPoint>: Layer {
    var adapter: DoubleConv<Scalar>
    var down1, down2, down3, down4: Down<Scalar>
    var up1, up2, up3, up4: Up<Scalar>
    var head: Head<Scalar>

    init(_ inChannels: Int = 3, _ outChannels: Int = 1) {
        adapter = DoubleConv(inChannels, 64)
        down1 = Down(64, 128)
        down2 = Down(128, 256)
        down3 = Down(256, 512)
        down4 = Down(512, 1024)
        up1 = Up(1024, 512)
        up2 = Up(512, 256)
        up3 = Up(256, 128)
        up4 = Up(128, 64)
        head = Head(64, outChannels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        let d0 = adapter(input)

        let d1 = down1(d0)
        let d2 = down2(d1)
        let d3 = down3(d2)
        let d4 = down4(d3)

        let u1 = up1(TensorPair(d4, d3))
        let u2 = up1(TensorPair(u1, d2))
        let u3 = up1(TensorPair(u2, d1))
        let u4 = up1(TensorPair(u3, d0))

        let output = head(u4)

        return output
    }
}
```

The problem is that the `Up` layer must accept two inputs (the input and shortcut connection), so I created a Differentiable struct to handle this case as the `callAsFunction()` method only accept one input. However, this leads to an error:

```traceback
error: <Cell 11>:32:22: error: expression is not differentiable
        let u4 = up1(TensorPair(u3, d0))
                     ^

<Cell 11>:32:22: note: cannot differentiate functions that have not been marked '@differentiable' and that are defined in other files
        let u4 = up1(TensorPair(u3, d0))
```

Same error for the three other `Up` layers.

* Any idea on how to solve this? 
* Is there a better alternative to write layers that accept more than one input?
* Will the `Layer` protocol support a `callAsFunction()` requirement with multiple inputs in the future (when generics allow that of course), i.e a `callAsFunction(_ input1: Input1, _ input2: Input2, ...)` or `callAsFunction(_ inputs: Input...)`? A also think that the `TensorPair` struct should be part of the TensorFlow Swift API, or that tuples should automatically conform to Differentiable when possible.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Cannot use custom Differentiable structure in a Layer input #582

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Cannot use custom Differentiable structure in a Layer input #582

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions