Skip to content
This repository was archived by the owner on Mar 30, 2022. It is now read-only.
This repository was archived by the owner on Mar 30, 2022. It is now read-only.

Cannot use custom Differentiable structure in a Layer input #582

Closed
@laclouis5

Description

@laclouis5

I'm trying to implement a UNet-like segmentation network, here is the complete definition:

struct TensorPair<Scalar: TensorFlowFloatingPoint>: AdditiveArithmetic, Differentiable {
    var lhs: Tensor<Scalar>
    var rhs: Tensor<Scalar>

    init(_ lhs: Tensor<Scalar>, _ rhs: Tensor<Scalar>) {
        self.lhs = lhs
        self.rhs = rhs
    }
}

struct ReLU<Scalar: TensorFlowFloatingPoint>: ParameterlessLayer {
    typealias TangentVector = EmptyTangentVector

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        relu(input)
    }
}

struct DoubleConv<Scalar: TensorFlowFloatingPoint>: Layer {
    var conv1, conv2: Conv2D<Scalar>
    var bn1, bn2: BatchNorm<Scalar>
    let relu = ReLU<Scalar>()

    init(_ inChannels: Int, _ outChannels: Int) {
        conv1 = Conv2D(filterShape: (3, 3, inChannels, outChannels), padding: .same, useBias: false)
        bn1 = BatchNorm(featureCount: outChannels)
        conv2 = Conv2D(filterShape: (3, 3, outChannels, outChannels), padding: .same, useBias: false)
        bn2 = BatchNorm(featureCount: outChannels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu)
    }
}

struct ResidualBlock<Scalar: TensorFlowFloatingPoint>: Layer {
    var conv1, conv2: Conv2D<Scalar>
    var bn1, bn2: BatchNorm<Scalar>
    var relu = ReLU<Scalar>()

    init(channels: Int) {
        conv1 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
        conv2 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
        bn1 = BatchNorm(featureCount: channels)
        bn2 = BatchNorm(featureCount: channels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu) + input
    }
}

struct Down<Scalar: TensorFlowFloatingPoint>: Layer {
    var downsample: Conv2D<Scalar>
    var conv: ResidualBlock<Scalar>

    init(_ inChannels: Int, _ outChannels: Int) {
        downsample = Conv2D(filterShape: (3, 3, inChannels, outChannels), strides: (2, 2), padding: .same)
        conv = ResidualBlock(channels: outChannels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        input.sequenced(through: downsample, conv)
    }
}

struct Up<Scalar: TensorFlowFloatingPoint>: Layer {
    var upsample: TransposedConv2D<Scalar>
    var conv: ResidualBlock<Scalar>

    init(_ inChannels: Int, _ outChannels: Int) {
        upsample = TransposedConv2D(filterShape: (2, 2, outChannels, inChannels), strides: (2, 2))
        conv = ResidualBlock(channels: outChannels)
    }

    @differentiable
    func callAsFunction(_ input: TensorPair<Scalar>) -> Tensor<Scalar> {
        conv(upsample(input.lhs) + input.rhs)
    }
}

struct Head<Scalar: TensorFlowFloatingPoint>: Layer {
    var conv: Conv2D<Scalar>

    init(_ inChannels: Int, _ outChannels: Int) {
        self.conv = Conv2D(filterShape: (1, 1, inChannels, outChannels), padding: .same)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        conv(input)
    }
}

struct UNet<Scalar: TensorFlowFloatingPoint>: Layer {
    var adapter: DoubleConv<Scalar>
    var down1, down2, down3, down4: Down<Scalar>
    var up1, up2, up3, up4: Up<Scalar>
    var head: Head<Scalar>

    init(_ inChannels: Int = 3, _ outChannels: Int = 1) {
        adapter = DoubleConv(inChannels, 64)
        down1 = Down(64, 128)
        down2 = Down(128, 256)
        down3 = Down(256, 512)
        down4 = Down(512, 1024)
        up1 = Up(1024, 512)
        up2 = Up(512, 256)
        up3 = Up(256, 128)
        up4 = Up(128, 64)
        head = Head(64, outChannels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        let d0 = adapter(input)

        let d1 = down1(d0)
        let d2 = down2(d1)
        let d3 = down3(d2)
        let d4 = down4(d3)

        let u1 = up1(TensorPair(d4, d3))
        let u2 = up1(TensorPair(u1, d2))
        let u3 = up1(TensorPair(u2, d1))
        let u4 = up1(TensorPair(u3, d0))

        let output = head(u4)

        return output
    }
}

The problem is that the Up layer must accept two inputs (the input and shortcut connection), so I created a Differentiable struct to handle this case as the callAsFunction() method only accept one input. However, this leads to an error:

error: <Cell 11>:32:22: error: expression is not differentiable
        let u4 = up1(TensorPair(u3, d0))
                     ^

<Cell 11>:32:22: note: cannot differentiate functions that have not been marked '@differentiable' and that are defined in other files
        let u4 = up1(TensorPair(u3, d0))

Same error for the three other Up layers.

  • Any idea on how to solve this?
  • Is there a better alternative to write layers that accept more than one input?
  • Will the Layer protocol support a callAsFunction() requirement with multiple inputs in the future (when generics allow that of course), i.e a callAsFunction(_ input1: Input1, _ input2: Input2, ...) or callAsFunction(_ inputs: Input...)? A also think that the TensorPair struct should be part of the TensorFlow Swift API, or that tuples should automatically conform to Differentiable when possible.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions