This repository was archived by the owner on Mar 30, 2022. It is now read-only.
This repository was archived by the owner on Mar 30, 2022. It is now read-only.
Cannot use custom Differentiable structure in a Layer input #582
Closed
Description
I'm trying to implement a UNet-like segmentation network, here is the complete definition:
struct TensorPair<Scalar: TensorFlowFloatingPoint>: AdditiveArithmetic, Differentiable {
var lhs: Tensor<Scalar>
var rhs: Tensor<Scalar>
init(_ lhs: Tensor<Scalar>, _ rhs: Tensor<Scalar>) {
self.lhs = lhs
self.rhs = rhs
}
}
struct ReLU<Scalar: TensorFlowFloatingPoint>: ParameterlessLayer {
typealias TangentVector = EmptyTangentVector
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
relu(input)
}
}
struct DoubleConv<Scalar: TensorFlowFloatingPoint>: Layer {
var conv1, conv2: Conv2D<Scalar>
var bn1, bn2: BatchNorm<Scalar>
let relu = ReLU<Scalar>()
init(_ inChannels: Int, _ outChannels: Int) {
conv1 = Conv2D(filterShape: (3, 3, inChannels, outChannels), padding: .same, useBias: false)
bn1 = BatchNorm(featureCount: outChannels)
conv2 = Conv2D(filterShape: (3, 3, outChannels, outChannels), padding: .same, useBias: false)
bn2 = BatchNorm(featureCount: outChannels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu)
}
}
struct ResidualBlock<Scalar: TensorFlowFloatingPoint>: Layer {
var conv1, conv2: Conv2D<Scalar>
var bn1, bn2: BatchNorm<Scalar>
var relu = ReLU<Scalar>()
init(channels: Int) {
conv1 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
conv2 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
bn1 = BatchNorm(featureCount: channels)
bn2 = BatchNorm(featureCount: channels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu) + input
}
}
struct Down<Scalar: TensorFlowFloatingPoint>: Layer {
var downsample: Conv2D<Scalar>
var conv: ResidualBlock<Scalar>
init(_ inChannels: Int, _ outChannels: Int) {
downsample = Conv2D(filterShape: (3, 3, inChannels, outChannels), strides: (2, 2), padding: .same)
conv = ResidualBlock(channels: outChannels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
input.sequenced(through: downsample, conv)
}
}
struct Up<Scalar: TensorFlowFloatingPoint>: Layer {
var upsample: TransposedConv2D<Scalar>
var conv: ResidualBlock<Scalar>
init(_ inChannels: Int, _ outChannels: Int) {
upsample = TransposedConv2D(filterShape: (2, 2, outChannels, inChannels), strides: (2, 2))
conv = ResidualBlock(channels: outChannels)
}
@differentiable
func callAsFunction(_ input: TensorPair<Scalar>) -> Tensor<Scalar> {
conv(upsample(input.lhs) + input.rhs)
}
}
struct Head<Scalar: TensorFlowFloatingPoint>: Layer {
var conv: Conv2D<Scalar>
init(_ inChannels: Int, _ outChannels: Int) {
self.conv = Conv2D(filterShape: (1, 1, inChannels, outChannels), padding: .same)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
conv(input)
}
}
struct UNet<Scalar: TensorFlowFloatingPoint>: Layer {
var adapter: DoubleConv<Scalar>
var down1, down2, down3, down4: Down<Scalar>
var up1, up2, up3, up4: Up<Scalar>
var head: Head<Scalar>
init(_ inChannels: Int = 3, _ outChannels: Int = 1) {
adapter = DoubleConv(inChannels, 64)
down1 = Down(64, 128)
down2 = Down(128, 256)
down3 = Down(256, 512)
down4 = Down(512, 1024)
up1 = Up(1024, 512)
up2 = Up(512, 256)
up3 = Up(256, 128)
up4 = Up(128, 64)
head = Head(64, outChannels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
let d0 = adapter(input)
let d1 = down1(d0)
let d2 = down2(d1)
let d3 = down3(d2)
let d4 = down4(d3)
let u1 = up1(TensorPair(d4, d3))
let u2 = up1(TensorPair(u1, d2))
let u3 = up1(TensorPair(u2, d1))
let u4 = up1(TensorPair(u3, d0))
let output = head(u4)
return output
}
}
The problem is that the Up
layer must accept two inputs (the input and shortcut connection), so I created a Differentiable struct to handle this case as the callAsFunction()
method only accept one input. However, this leads to an error:
error: <Cell 11>:32:22: error: expression is not differentiable
let u4 = up1(TensorPair(u3, d0))
^
<Cell 11>:32:22: note: cannot differentiate functions that have not been marked '@differentiable' and that are defined in other files
let u4 = up1(TensorPair(u3, d0))
Same error for the three other Up
layers.
- Any idea on how to solve this?
- Is there a better alternative to write layers that accept more than one input?
- Will the
Layer
protocol support acallAsFunction()
requirement with multiple inputs in the future (when generics allow that of course), i.e acallAsFunction(_ input1: Input1, _ input2: Input2, ...)
orcallAsFunction(_ inputs: Input...)
? A also think that theTensorPair
struct should be part of the TensorFlow Swift API, or that tuples should automatically conform to Differentiable when possible.
Metadata
Metadata
Assignees
Labels
No labels