Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[New feature] Add transformer for language model #2813

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
add transformer
  • Loading branch information
zhangxiaoli73 committed May 13, 2019
commit b903d69620f7396a45c67307b4dd675951b3eb44
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class Attention[T: ClassTag](
// for self attention, InputX and InputY should be the same.
// Bias is attention bias that will be added to the result of the dot product.
val inputX = Input()
val inputY = Input()
val inputY = inputX // Input()
val inputBias = Input()

// Layers for linearly projecting the queries, keys, and values.
Expand Down Expand Up @@ -70,7 +70,7 @@ class Attention[T: ClassTag](
// Run the combined outputs through another linear projection layer.
val outputLayer = TransformerOperation.dense(
hiddenSize, hiddenSize, false, name = "output_transform").inputs(combineHeads)
val graph = Graph(Array(inputX, inputY, inputBias), Array(outputLayer))
val graph = Graph(Array(inputX, inputBias), Array(outputLayer))
if (this.train) graph.training() else graph.evaluate()
graph
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package com.intel.analytics.bigdl.nn
import com.intel.analytics.bigdl.nn.abstractnn.AbstractModule
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.Table
import com.intel.analytics.bigdl.utils.{T, Table}
import com.intel.analytics.bigdl.utils.serializer.{DeserializeContext, ModuleSerializable}

import scala.reflect._
Expand Down Expand Up @@ -91,13 +91,16 @@ class CAddTable[T: ClassTag, D: ClassTag](val inplace: Boolean = false)(
} else {
if (input[Tensor[D]](i).isSameSizeAs(gradOutput)) {
gradInput[Tensor[D]](i).resizeAs(gradOutput).copy(gradOutput)
} else {
} else if (input[Tensor[D]](i).isScalar) {
require(input[Tensor[D]](i).isScalar, "Only support scalar broadcast backward now")
if (!calculateSum) {
sum = gradOutput.sum()
calculateSum = true
}
gradInput[Tensor[D]](i).resizeAs(input[Tensor[D]](i)).setValue(sum)
} else {
// todo: refactor same with zoo
gradInput[Tensor[D]](i).resizeAs(input[Tensor[D]](i)).copy(gradOutput.sum(1).sum(2))
}
}
i += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,13 @@ class FeedForwardNetwork[T: ClassTag](val hiddenSize: Int, val filterSize: Int,
override def buildModel(): Module[T] = {
val input = Input()
val filterLayer = TransformerOperation.dense(
hiddenSize, filterSize, bias = true, activation = ReLU[T]()).inputs(input)
hiddenSize, filterSize, bias = true, activation = ReLU[T](),
name = "filter_layer").inputs(input)
val drop = if (train) {
Dropout(initP = (1.0 - reluDropout)).inputs(filterLayer)
} else filterLayer
val output_dense_layer = TransformerOperation.dense(
filterSize, hiddenSize, bias = true).inputs(drop)
filterSize, hiddenSize, bias = true, name = "output_layer").inputs(drop)
val graph = Graph(Array(input), Array(output_dense_layer))
if (this.train) graph.training() else graph.evaluate()
graph
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright 2018 Analytics Zoo Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.intel.analytics.bigdl.nn

import com.intel.analytics.bigdl.nn.abstractnn.AbstractModule
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.{T, Table}

import scala.reflect.ClassTag

class InternalCSubTable[T: ClassTag](expandDim: Int = 1)
(implicit ev: TensorNumeric[T]) extends CSubTable[T] {
private var expandLayer: AbstractModule[Tensor[T], Tensor[T], T] = null

override def updateOutput(input: Table): Tensor[_] = {
val input1 = input[Tensor[T]](1)
val input2 = input[Tensor[T]](2)

expandLayer = InternalExpand(input1.size())
val input3 = expandLayer.forward(input2)

output = super.updateOutput(T(input1, input3))
return output
}

override def updateGradInput(input: Table, gradOutput: Tensor[_]): Table = {
val input1 = input[Tensor[T]](1)
val input2 = input[Tensor[T]](2)

expandLayer = InternalExpand(input1.size())
val input3 = expandLayer.forward(input2)

gradInput = super.updateGradInput(T(input1, input3), gradOutput)
gradInput(2) = expandLayer.backward(input2, gradInput[Tensor[T]](2))
gradInput
}

override def toString: String = s"InternalCSubTable()"

}

object InternalCSubTable {
def apply[@specialized(Float, Double) T: ClassTag]()
(implicit ev: TensorNumeric[T]) : InternalCSubTable[T] = {
new InternalCSubTable[T]()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Copyright 2018 Analytics Zoo Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.intel.analytics.bigdl.nn

import com.intel.analytics.bigdl.nn.abstractnn.AbstractModule
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag

/**
* Expand tensor to configured size
* @param tgtSizes target tensor sizes, dim whose size is -1 will be ignored
* @tparam T Numeric type of parameter(e.g. weight, bias). Only support float/double now.
*/
class InternalExpand[T: ClassTag](tgtSizes: Array[Int])
(implicit ev: TensorNumeric[T]) extends AbstractModule[Tensor[T], Tensor[T], T] {

override def updateOutput(input: Tensor[T]): Tensor[T] = {
require(tgtSizes.length == input.dim(),
s"the number of dimensions provided must equal ${input.dim()}")
val tensorDim = input.dim()
val tensorStride = input.stride()
val tensorSize = input.size()

var i = 0
while (i < tensorDim) {
if (tgtSizes(i) != -1) {
if (tensorSize(i) == 1) {
tensorSize(i) = tgtSizes(i)
tensorStride(i) = 0
} else if (tensorSize(i) != tgtSizes(i)) {
throw new UnsupportedOperationException(
"incorrect size: only supporting singleton expansion (size=1)")
}
}
i += 1
}

output.set(input.storage(), input.storageOffset(), tensorSize, tensorStride)
output
}

override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
val tensorDim = input.dim()
val tensorSize = input.size()

gradInput = Tensor[T](tensorSize)
val expandDim = new ArrayBuffer[Int]()
var i = 0
while (i < tensorDim) {
if (tgtSizes(i) != -1) {
if (tensorSize(i) == 1 && tgtSizes(i) != 1) {
expandDim.append(i + 1)
}
}
i += 1
}

i = expandDim.size - 1
val sizes = gradOutput.size()
var _gradOutput = gradOutput
while (i >= 0) {
var start = 1
sizes(expandDim(i) - 1) = 1
val _gradInput = Tensor[T](sizes)
while (start <= gradOutput.size(expandDim(i))) {
val x = _gradOutput.narrow(expandDim(i), start, 1)
_gradInput.add(x)
start += 1
}
_gradOutput = _gradInput
i -= 1
}
gradInput = _gradOutput
gradInput
}

override def toString: String = s"InternalExpand()"
}

object InternalExpand {
def apply[@specialized(Float, Double) T: ClassTag](tgtSizes: Array[Int])
(implicit ev: TensorNumeric[T]) : InternalExpand[T] = {
new InternalExpand[T](tgtSizes)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright 2018 Analytics Zoo Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.intel.analytics.bigdl.nn

import com.intel.analytics.bigdl.nn.abstractnn.AbstractModule
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.{T, Table}

import scala.reflect.ClassTag

class InternalMulTable[T: ClassTag](expandDim: Int = 1)
(implicit ev: TensorNumeric[T]) extends CMulTable[T] {
private var expandLayer: AbstractModule[Tensor[T], Tensor[T], T] = null

override def updateOutput(input: Table): Tensor[T] = {
val input1 = input[Tensor[T]](1)
val input2 = input[Tensor[T]](2)

expandLayer = InternalExpand(input1.size())
val input3 = expandLayer.forward(input2)

output = super.updateOutput(T(input1, input3))
return output
}

override def updateGradInput(input: Table, gradOutput: Tensor[T]): Table = {
val input1 = input[Tensor[T]](1)
val input2 = input[Tensor[T]](2)

expandLayer = InternalExpand(input1.size())
val input3 = expandLayer.forward(input2)

gradInput = super.updateGradInput(T(input1, input3), gradOutput)
gradInput(2) = expandLayer.backward(input2, gradInput[Tensor[T]](2))
gradInput
}

override def toString: String = s"InternalCMulTable()"

}

object InternalCMulTable {
def apply[@specialized(Float, Double) T: ClassTag]()
(implicit ev: TensorNumeric[T]) : InternalMulTable[T] = {
new InternalMulTable[T]()
}
}
Loading