diff --git a/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIterator.java b/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIterator.java
index c96285b373c6..82d5ca3feff9 100644
--- a/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIterator.java
+++ b/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIterator.java
@@ -31,7 +31,6 @@
import org.datavec.api.util.ndarray.RecordConverter;
import org.datavec.api.writable.NDArrayWritable;
import org.datavec.api.writable.Writable;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.datavec.exception.ZeroLengthSequenceException;
import org.deeplearning4j.exception.DL4JException;
import org.nd4j.linalg.api.ndarray.INDArray;
@@ -41,6 +40,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
import java.io.IOException;
import java.io.Serializable;
diff --git a/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/iterator/impl/TinyImageNetDataSetIterator.java b/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/iterator/impl/TinyImageNetDataSetIterator.java
index 6a8d41398927..9dda65428e75 100644
--- a/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/iterator/impl/TinyImageNetDataSetIterator.java
+++ b/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/iterator/impl/TinyImageNetDataSetIterator.java
@@ -19,21 +19,12 @@
package org.deeplearning4j.datasets.iterator.impl;
import lombok.Getter;
-import org.datavec.api.io.labels.ParentPathLabelGenerator;
-import org.datavec.api.io.labels.PathLabelGenerator;
-import org.datavec.api.io.labels.PatternPathLabelGenerator;
-import org.datavec.image.recordreader.ImageRecordReader;
import org.datavec.image.transform.ImageTransform;
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
-import org.deeplearning4j.datasets.fetchers.*;
-import org.deeplearning4j.datasets.iterator.BaseDatasetIterator;
+import org.deeplearning4j.datasets.fetchers.DataSetType;
+import org.deeplearning4j.datasets.fetchers.TinyImageNetFetcher;
import org.nd4j.linalg.dataset.api.DataSetPreProcessor;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-
/**
* Tiny ImageNet is a subset of the ImageNet database. TinyImageNet is the default course challenge for CS321n
* at Stanford University.
diff --git a/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java b/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java
index ab65e9b05aae..c59db1163db6 100644
--- a/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java
+++ b/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java
@@ -23,7 +23,6 @@
import com.google.common.util.concurrent.AtomicDouble;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.math3.util.FastMath;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.clustering.sptree.DataPoint;
import org.deeplearning4j.clustering.sptree.SpTree;
import org.deeplearning4j.clustering.vptree.VPTree;
@@ -39,6 +38,7 @@
import org.nd4j.linalg.indexing.conditions.Conditions;
import org.nd4j.linalg.indexing.functions.Value;
import org.nd4j.linalg.learning.legacy.AdaGrad;
+import org.nd4j.linalg.primitives.Pair;
import java.io.BufferedWriter;
import java.io.File;
@@ -671,11 +671,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) {
throw new UnsupportedOperationException();
}
- @Override
- public void applyLearningRateScoreDecay() {
- throw new UnsupportedOperationException("Not yet implemented");
- }
-
@Override
public void fit(INDArray data) {
this.x = data;
diff --git a/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/Tsne.java b/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/Tsne.java
index b5581e7f0a29..5fd36bddb617 100644
--- a/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/Tsne.java
+++ b/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/Tsne.java
@@ -2,7 +2,6 @@
import com.google.common.primitives.Ints;
import org.apache.commons.math3.util.FastMath;
-import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dimensionalityreduction.PCA;
import org.nd4j.linalg.factory.Nd4j;
@@ -12,6 +11,7 @@
import org.nd4j.linalg.indexing.conditions.Conditions;
import org.nd4j.linalg.indexing.functions.Value;
import org.nd4j.linalg.learning.legacy.AdaGrad;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.util.ArrayUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java
new file mode 100644
index 000000000000..bde9eb28a806
--- /dev/null
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java
@@ -0,0 +1,55 @@
+package org.deeplearning4j;
+
+import org.deeplearning4j.nn.graph.ComputationGraph;
+import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
+import org.deeplearning4j.util.ModelSerializer;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestUtils {
+
+ public static MultiLayerNetwork testModelSerialization(MultiLayerNetwork net){
+
+ try {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ModelSerializer.writeModel(net, baos, true);
+ byte[] bytes = baos.toByteArray();
+
+ ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+ MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
+
+ assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations());
+ assertEquals(net.params(), restored.params());
+
+ return restored;
+ } catch (IOException e){
+ //Should never happen
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static ComputationGraph testModelSerialization(ComputationGraph net){
+
+ try {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ModelSerializer.writeModel(net, baos, true);
+ byte[] bytes = baos.toByteArray();
+
+ ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+ ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais, true);
+
+ assertEquals(net.getConfiguration(), restored.getConfiguration());
+ assertEquals(net.params(), restored.params());
+
+ return restored;
+ } catch (IOException e){
+ //Should never happen
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIteratorTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIteratorTest.java
index 8ad663cfba6d..773b47184c1c 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIteratorTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIteratorTest.java
@@ -1,10 +1,10 @@
package org.deeplearning4j.datasets.iterator;
import org.apache.commons.lang3.RandomUtils;
-import org.nd4j.linalg.primitives.Pair;
import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Iterator;
import java.util.concurrent.atomic.AtomicInteger;
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/MultipleEpochsIteratorTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/MultipleEpochsIteratorTest.java
index 44a1b2ce2dc5..9cfe3f3b783e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/MultipleEpochsIteratorTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/MultipleEpochsIteratorTest.java
@@ -98,7 +98,7 @@ public void testLoadBatchDataSet() throws Exception {
public void testCifarDataSetIteratorReset() {
int epochs = 2;
Nd4j.getRandom().setSeed(12345);
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.weightInit(WeightInit.XAVIER).seed(12345L).list()
.layer(0, new DenseLayer.Builder().nIn(400).nOut(50).activation(Activation.RELU).build())
.layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java
index 291af43ba330..c8763ccea273 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java
@@ -15,7 +15,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.BaseLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
@@ -29,6 +28,8 @@
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Nesterovs;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -47,7 +48,7 @@ public class TestEarlyStopping {
public void testEarlyStoppingIris() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
@@ -90,7 +91,7 @@ public void testEarlyStoppingIris() {
public void testEarlyStoppingEveryNEpoch() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd(0.01)).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
@@ -118,7 +119,7 @@ public void testEarlyStoppingEveryNEpoch() {
public void testEarlyStoppingIrisMultiEpoch() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
@@ -166,7 +167,7 @@ public void testBadTuning() {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(5.0) //Intentionally huge LR
+ .updater(new Sgd(5.0)) //Intentionally huge LR
.weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
@@ -205,7 +206,7 @@ public void testTimeTermination() {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(1e-6).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
@@ -247,7 +248,7 @@ public void testNoImprovementNEpochsTermination() {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(0.0).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
@@ -285,8 +286,8 @@ public void testMinImprovementNEpochsTermination() {
Random rng = new Random(123);
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123).iterations(10)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.0)
- .updater(Updater.NESTEROVS).momentum(0.9).list()
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
+ .updater(new Nesterovs(0.0,0.9)).list()
.layer(0, new DenseLayer.Builder().nIn(1).nOut(20)
.weightInit(WeightInit.XAVIER).activation(
Activation.TANH)
@@ -333,7 +334,7 @@ public void testMinImprovementNEpochsTermination() {
public void testEarlyStoppingGetBestModel() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
@@ -364,14 +365,14 @@ public void testEarlyStoppingGetBestModel() {
assertEquals(net.conf().getOptimizationAlgo(), mln.conf().getOptimizationAlgo());
BaseLayer bl = (BaseLayer) net.conf().getLayer();
assertEquals(bl.getActivationFn().toString(), ((BaseLayer) mln.conf().getLayer()).getActivationFn().toString());
- assertEquals(bl.getUpdater(), ((BaseLayer) mln.conf().getLayer()).getUpdater());
+ assertEquals(bl.getIUpdater(), ((BaseLayer) mln.conf().getLayer()).getIUpdater());
}
@Test
public void testListeners() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java
index 90d7dc9902ff..b700e0b14a18 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java
@@ -31,7 +31,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.weights.WeightInit;
@@ -40,6 +39,7 @@
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -55,7 +55,7 @@ public class TestEarlyStoppingCompGraph {
public void testEarlyStoppingIris() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
+ .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
.addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in")
.setOutputs("0").pretrain(false).backprop(true).build();
@@ -98,7 +98,7 @@ public void testBadTuning() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(5.0) //Intentionally huge LR
+ .updater(new Sgd(5.0)) //Intentionally huge LR
.weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
.addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in")
@@ -134,7 +134,7 @@ public void testTimeTermination() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(1e-6).weightInit(WeightInit.XAVIER).graphBuilder()
+ .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).graphBuilder()
.addInputs("in")
.addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in")
@@ -175,7 +175,7 @@ public void testNoImprovementNEpochsTermination() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(0.0).weightInit(WeightInit.XAVIER).graphBuilder()
+ .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).graphBuilder()
.addInputs("in")
.addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in")
@@ -209,7 +209,7 @@ public void testNoImprovementNEpochsTermination() {
public void testListeners() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
+ .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
.addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in")
.setOutputs("0").pretrain(false).backprop(true).build();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/eval/EvalTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/eval/EvalTest.java
index 12f7c58a15df..40e0419ac882 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/eval/EvalTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/eval/EvalTest.java
@@ -29,7 +29,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@@ -47,6 +46,7 @@
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.io.ClassPathResource;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.util.FeatureUtil;
@@ -179,7 +179,7 @@ public void testIris() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(1).seed(42)
- .learningRate(1e-6).list()
+ .updater(new Sgd(1e-6)).list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(2).activation(Activation.TANH)
.weightInit(WeightInit.XAVIER).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
@@ -623,8 +623,8 @@ public void testEvaluationWithMetaData() throws Exception {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
- .learningRate(0.1).list()
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1))
+ .list()
.layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(4).nOut(3).build())
.pretrain(false).backprop(true).build();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java
index ceabfd54382b..3db596ce5f2e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java
@@ -5,7 +5,6 @@
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
@@ -23,6 +22,7 @@
import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization;
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.Random;
@@ -54,8 +54,8 @@ public void testGradient2dSimple() {
INDArray labels = ds.getLabels();
MultiLayerConfiguration.Builder builder =
- new NeuralNetConfiguration.Builder().learningRate(1.0).regularization(false)
- .updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ new NeuralNetConfiguration.Builder().updater(new NoOp())
+ .seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1)).list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(3)
.activation(Activation.IDENTITY).build())
@@ -93,8 +93,8 @@ public void testGradientCnnSimple() {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
- MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0)
- .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
.activation(Activation.IDENTITY).build())
@@ -157,9 +157,9 @@ public void testGradientBNWithCNNandSubsampling() {
Activation outputActivation = outputActivations[i];
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(12345)
- .regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]).l2(l2vals[j])
+ .l2(l2vals[j])
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
- .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION)
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-2, 2)).seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3)
.activation(afn).build())
@@ -256,10 +256,9 @@ public void testGradientDense() {
MultiLayerConfiguration.Builder builder =
new NeuralNetConfiguration.Builder()
- .regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j])
.l2(l2vals[j])
.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
- .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION)
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-2, 2)).seed(12345L).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(4)
.activation(afn).build())
@@ -325,8 +324,8 @@ public void testGradient2dFixedGammaBeta() {
INDArray input = ds.getFeatureMatrix();
INDArray labels = ds.getLabels();
- MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0)
- .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
+ .seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1)).list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY).build())
.layer(1, new BatchNormalization.Builder().lockGammaBeta(true).gamma(2.0).beta(0.5).nOut(3)
@@ -364,8 +363,8 @@ public void testGradientCnnFixedGammaBeta() {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
- MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0)
- .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
+ .seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
.activation(Activation.IDENTITY).build())
@@ -400,8 +399,8 @@ public void testBatchNormCompGraphSimple() {
int minibatchSize = 3;
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).updater(Updater.NONE)
- .weightInit(WeightInit.XAVIER).regularization(false).graphBuilder().addInputs("in")
+ ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).updater(new NoOp())
+ .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
.setInputTypes(InputType.convolutional(height, width, channels))
.addLayer("bn", new BatchNormalization.Builder().build(), "in")
.addLayer("out", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT)
@@ -465,9 +464,8 @@ public void testGradientBNWithCNNandSubsamplingCompGraph() {
Activation outputActivation = outputActivations[i];
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
- .regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]).l2(l2vals[j])
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
- .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION)
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder()
.addInputs("in")
.addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java
index c51d1782c9b1..5c83a0f46176 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java
@@ -3,7 +3,6 @@
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
@@ -15,6 +14,7 @@
import org.nd4j.linalg.api.buffer.util.DataTypeUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import static org.junit.Assert.assertEquals;
@@ -69,7 +69,7 @@ public void testCnn1DWithZeroPadding1D() {
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .learningRate(1.0).updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION)
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list()
.layer(new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel)
.stride(stride).padding(padding).nIn(convNIn).nOut(convNOut1)
@@ -145,8 +145,8 @@ public void testCnn1DWithSubsampling1D() {
}
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .learningRate(1.0).updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list()
.layer(0, new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel)
.stride(stride).padding(padding).nIn(convNIn).nOut(convNOut1)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java
index 2c2165e73148..1263ed9c7adc 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java
@@ -5,7 +5,6 @@
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
@@ -18,6 +17,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.Arrays;
@@ -44,26 +44,26 @@ public void testGradientCNNMLN() {
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
- String[] activFns = {"sigmoid", "tanh"};
+ Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
- String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here
+ Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
DataSet ds = new IrisDataSetIterator(150, 150).next();
ds.normalizeZeroMeanZeroUnitVariance();
INDArray input = ds.getFeatureMatrix();
INDArray labels = ds.getLabels();
- for (String afn : activFns) {
+ for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
LossFunctions.LossFunction lf = lossFunctions[i];
- String outputActivation = outputActivations[i];
+ Activation outputActivation = outputActivations[i];
- MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().regularization(false)
- .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(Updater.NONE)
+ MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
+ .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
.weightInit(WeightInit.XAVIER).seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build())
.layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build())
@@ -117,12 +117,12 @@ public void testGradientCNNL1L2MLN() {
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
- String[] activFns = {"sigmoid", "tanh"};
+ Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
- String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here
+ Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
DataSet ds = new IrisDataSetIterator(150, 150).next();
ds.normalizeZeroMeanZeroUnitVariance();
@@ -135,25 +135,25 @@ public void testGradientCNNL1L2MLN() {
double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
- for (String afn : activFns) {
+ for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunctions.LossFunction lf = lossFunctions[i];
- String outputActivation = outputActivations[i];
+ Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
- .regularization(true).l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k])
+ .l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k])
.optimizationAlgo(
OptimizationAlgorithm.CONJUGATE_GRADIENT)
.seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(new int[] {1, 1}).nIn(1).nOut(6)
.weightInit(WeightInit.XAVIER).activation(afn)
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
- .weightInit(WeightInit.XAVIER).updater(Updater.NONE).build())
+ .weightInit(WeightInit.XAVIER).updater(new NoOp()).build())
.pretrain(false).backprop(true)
.setInputType(InputType.convolutionalFlat(1, 4, 1));
@@ -230,8 +230,8 @@ public void testCnnWithUpsampling() {
}
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0)
- .updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION)
+ new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
.list().layer(new ConvolutionLayer.Builder(kernel,
stride, padding).nIn(inputDepth)
@@ -281,12 +281,12 @@ public void testCnnWithSubsampling() {
int[] padding = {0, 0};
int pnorm = 2;
- String[] activations = {"sigmoid", "tanh"};
+ Activation[] activations = {Activation.SIGMOID, Activation.TANH};
SubsamplingLayer.PoolingType[] poolingTypes =
new SubsamplingLayer.PoolingType[] {SubsamplingLayer.PoolingType.MAX,
SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};
- for (String afn : activations) {
+ for (Activation afn : activations) {
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
@@ -296,8 +296,8 @@ public void testCnnWithSubsampling() {
}
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0)
- .updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION)
+ new NeuralNetConfiguration.Builder().updater(new NoOp())
+ .weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
.list().layer(0,
new ConvolutionLayer.Builder(kernel,
@@ -348,12 +348,12 @@ public void testCnnWithSubsamplingV2() {
int[] padding = {0, 0};
int pNorm = 3;
- String[] activations = {"sigmoid", "tanh"};
+ Activation[] activations = {Activation.SIGMOID, Activation.TANH};
SubsamplingLayer.PoolingType[] poolingTypes =
new SubsamplingLayer.PoolingType[] {SubsamplingLayer.PoolingType.MAX,
SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};
- for (String afn : activations) {
+ for (Activation afn : activations) {
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
@@ -363,8 +363,7 @@ public void testCnnWithSubsamplingV2() {
}
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0)
- .updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION)
+ new NeuralNetConfiguration.Builder().updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
.list().layer(0,
new ConvolutionLayer.Builder(kernel,
@@ -407,14 +406,14 @@ public void testCnnMultiLayer() {
int height = 5;
int[] inputDepths = {1, 2, 4};
- String[] activations = {"sigmoid", "tanh"};
+ Activation[] activations = {Activation.SIGMOID, Activation.TANH};
SubsamplingLayer.PoolingType[] poolingTypes = new SubsamplingLayer.PoolingType[] {
SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG};
Nd4j.getRandom().setSeed(12345);
for (int inputDepth : inputDepths) {
- for (String afn : activations) {
+ for (Activation afn : activations) {
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
@@ -423,8 +422,8 @@ public void testCnnMultiLayer() {
labels.putScalar(new int[] {i, i % nOut}, 1.0);
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
- .regularization(false).learningRate(1.0).updater(Updater.SGD).activation(afn)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new NoOp())
+ .activation(afn)
.list()
.layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1)
.padding(0, 0).nIn(inputDepth).nOut(2).build())//output: (5-2+0)/1+1 = 4
@@ -486,7 +485,7 @@ public void testCnnSamePaddingMode() {
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
- .regularization(false).learningRate(1.0).updater(Updater.SGD)
+ .updater(new NoOp())
.activation(Activation.TANH).convolutionMode(ConvolutionMode.Same).list()
.layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k)
.stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build())
@@ -553,7 +552,7 @@ public void testCnnSamePaddingModeStrided() {
.stride(stride, stride).padding(0, 0).build();
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
- .regularization(false).learningRate(1.0).updater(Updater.SGD)
+ .updater(new NoOp())
.activation(Activation.TANH).convolutionMode(ConvolutionMode.Same).list()
.layer(0, convFirst ? convLayer : poolLayer)
.layer(1, convFirst ? poolLayer : convLayer)
@@ -613,8 +612,7 @@ public void testCnnZeroPaddingLayer() {
for (int[] zeroPad : zeroPadLayer) {
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0)
- .updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION)
+ new NeuralNetConfiguration.Builder().updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1)).list()
.layer(0, new ConvolutionLayer.Builder(kernel, stride, padding)
.nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5
@@ -688,7 +686,7 @@ public void testCnnDilated() {
}
NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345)
- .learningRate(1.0).updater(Updater.SGD)
+ .updater(new NoOp())
.activation(Activation.TANH).convolutionMode(cm).list()
.layer(new ConvolutionLayer.Builder().name("layer 0")
.kernelSize(k, k)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java
index 275a36bd6185..ce7280ce6739 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java
@@ -3,7 +3,6 @@
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
@@ -15,6 +14,7 @@
import org.nd4j.linalg.api.buffer.util.DataTypeUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.Random;
@@ -53,8 +53,8 @@ public void testLSTMGlobalPoolingBasicMultiLayer() {
for (int miniBatchSize : minibatchSizes) {
for (PoolingType pt : poolingTypes) {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.build())
@@ -116,8 +116,8 @@ public void testCnnGlobalPoolingBasicMultiLayer() {
for (int miniBatchSize : minibatchSizes) {
for (PoolingType pt : poolingTypes) {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(layerDepth)
.build())
@@ -171,8 +171,8 @@ public void testLSTMWithMasking() {
for (PoolingType pt : poolingTypes) {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.build())
@@ -255,8 +255,8 @@ public void testCnnGlobalPoolingMasking() {
stride = new int[] {inputH, 1};
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1.0)).convolutionMode(ConvolutionMode.Same)
.seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder().kernelSize(kernel).stride(stride)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java
index d6390e6b526f..2d1af110e3c3 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java
@@ -4,7 +4,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@@ -20,6 +19,7 @@
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler;
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
import java.util.Random;
@@ -48,11 +48,11 @@ public void testGradientMLP2LayerIrisSimple() {
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
- String[] activFns = {"sigmoid", "tanh", "softplus"}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities
+ Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
- String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here
+ Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
DataNormalization scaler = new NormalizerMinMaxScaler();
DataSetIterator iter = new IrisDataSetIterator(150, 150);
scaler.fit(iter);
@@ -62,25 +62,24 @@ public void testGradientMLP2LayerIrisSimple() {
INDArray input = ds.getFeatureMatrix();
INDArray labels = ds.getLabels();
- for (String afn : activFns) {
+ for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
LossFunction lf = lossFunctions[i];
- String outputActivation = outputActivations[i];
+ Activation outputActivation = outputActivations[i];
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
.seed(12345L)
.list().layer(0,
new DenseLayer.Builder().nIn(4).nOut(3)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
- .activation(afn).updater(
- Updater.SGD)
+ .activation(afn)
.build())
.layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nIn(3).nOut(3)
.weightInit(WeightInit.DISTRIBUTION)
- .dist(new NormalDistribution(0, 1)).updater(Updater.SGD).build())
+ .dist(new NormalDistribution(0, 1)).build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
@@ -128,11 +127,11 @@ public void testGradientMLP2LayerIrisL1L2Simple() {
//As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
//Need to run gradient through updater, so that L2 can be applied
- String[] activFns = {"sigmoid", "tanh"};
+ Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
- String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here
+ Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
DataNormalization scaler = new NormalizerMinMaxScaler();
DataSetIterator iter = new IrisDataSetIterator(150, 150);
@@ -149,17 +148,17 @@ public void testGradientMLP2LayerIrisL1L2Simple() {
double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
- for (String afn : activFns) {
+ for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
- String outputActivation = outputActivations[i];
+ Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1)
+ new NeuralNetConfiguration.Builder().l2(l2).l1(l1)
.l2Bias(biasL2[k]).l1Bias(biasL1[k])
.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
.seed(12345L)
@@ -168,12 +167,12 @@ public void testGradientMLP2LayerIrisL1L2Simple() {
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0,
1))
- .updater(Updater.NONE)
+ .updater(new NoOp())
.activation(afn).build())
.layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE)
+ .updater(new NoOp())
.activation(outputActivation).build())
.pretrain(false).backprop(true).build();
@@ -230,17 +229,17 @@ public void testEmbeddingLayerSimple() {
labels.putScalar(new int[] {i, r.nextInt(3)}, 1.0);
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.1)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.1)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345L)
.list().layer(0,
new EmbeddingLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER)
.dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).activation(
+ .updater(new NoOp()).activation(
Activation.TANH)
.build())
.layer(1, new OutputLayer.Builder(LossFunction.MCXENT).nIn(3).nOut(3)
.weightInit(WeightInit.XAVIER).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).activation(Activation.SOFTMAX).build())
+ .updater(new NoOp()).activation(Activation.SOFTMAX).build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
@@ -268,7 +267,7 @@ public void testRbm() {
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunction[] lossFunctions = {LossFunction.MSE, LossFunction.KL_DIVERGENCE};
- String[] outputActivations = {"softmax", "sigmoid"}; //i.e., lossFunctions[i] used with outputActivations[i] here
+ Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
DataNormalization scaler = new NormalizerMinMaxScaler();
DataSetIterator iter = new IrisDataSetIterator(150, 150);
@@ -287,21 +286,20 @@ public void testRbm() {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
- String outputActivation = outputActivations[i];
+ Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2)
- .l1(l1).learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(l2).l1(l1).updater(new NoOp())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
+ .updater(new NoOp())
.seed(12345L)
.list().layer(0,
new RBM.Builder(hidunit, RBM.VisibleUnit.BINARY).nIn(4).nOut(3)
- .weightInit(WeightInit.UNIFORM).updater(
- Updater.SGD)
+ .weightInit(WeightInit.UNIFORM)
.build())
.layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3)
- .weightInit(WeightInit.XAVIER).updater(Updater.SGD)
+ .weightInit(WeightInit.XAVIER)
.activation(outputActivation).build())
.pretrain(false).backprop(true).build();
@@ -354,11 +352,11 @@ public void testAutoEncoder() {
//As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
//Need to run gradient through updater, so that L2 can be applied
- String[] activFns = {"sigmoid", "tanh"};
+ Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
- String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here
+ Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH};
DataNormalization scaler = new NormalizerMinMaxScaler();
DataSetIterator iter = new IrisDataSetIterator(150, 150);
@@ -375,22 +373,23 @@ public void testAutoEncoder() {
double[] l2vals = {0.2, 0.0, 0.2};
double[] l1vals = {0.0, 0.3, 0.3}; //i.e., use l2vals[i] with l1vals[i]
- for (String afn : activFns) {
+ for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
- String outputActivation = outputActivations[i];
+ Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(true).learningRate(1.0)
+ new NeuralNetConfiguration.Builder()
+ .updater(new NoOp())
.l2(l2).l1(l1)
.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
.seed(12345L).weightInit(WeightInit.DISTRIBUTION)
- .dist(new NormalDistribution(0, 1)).updater(Updater.SGD)
+ .dist(new NormalDistribution(0, 1))
.list().layer(0,
new AutoEncoder.Builder().nIn(4).nOut(3)
.activation(afn).build())
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java
index 9f2f27a0e363..48184b9f502b 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java
@@ -5,7 +5,6 @@
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.GaussianDistribution;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
@@ -28,6 +27,7 @@
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.Arrays;
@@ -54,8 +54,8 @@ public void testBasicIris() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE)
- .learningRate(1.0).graphBuilder().addInputs("input")
+ .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(new NoOp())
+ .graphBuilder().addInputs("input")
.addLayer("firstLayer",
new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(),
"input")
@@ -99,8 +99,8 @@ public void testBasicIrisWithMerging() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE)
- .learningRate(1.0).graphBuilder().addInputs("input")
+ .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(new NoOp())
+ .graphBuilder().addInputs("input")
.addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(),
"input")
.addLayer("l2", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(),
@@ -156,7 +156,7 @@ public void testBasicIrisWithElementWiseNode() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input")
+ .updater(new NoOp()).graphBuilder().addInputs("input")
.addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(),
"input")
.addLayer("l2", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.SIGMOID)
@@ -213,7 +213,7 @@ public void testBasicIrisWithElementWiseNodeInputSizeGreaterThanTwo() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input")
+ .updater(new NoOp()).graphBuilder().addInputs("input")
.addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(),
"input")
.addLayer("l2", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.SIGMOID)
@@ -266,8 +266,8 @@ public void testCnnDepthMerge() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.1)).updater(Updater.NONE)
- .learningRate(1.0).graphBuilder().addInputs("input")
+ .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.1))
+ .updater(new NoOp()).graphBuilder().addInputs("input")
.addLayer("l1", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0)
.nIn(2).nOut(2).activation(Activation.TANH).build(), "input")
.addLayer("l2", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0)
@@ -313,7 +313,7 @@ public void testLSTMWithMerging() {
new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0.2, 0.6))
- .updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input")
+ .updater(new NoOp()).graphBuilder().addInputs("input")
.setOutputs("out")
.addLayer("lstm1",
new GravesLSTM.Builder().nIn(3).nOut(4)
@@ -371,8 +371,8 @@ public void testLSTMWithSubset() {
Nd4j.getRandom().setSeed(1234);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(1234)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE)
- .learningRate(1.0).graphBuilder().addInputs("input").setOutputs("out")
+ .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
+ .updater(new NoOp()).graphBuilder().addInputs("input").setOutputs("out")
.addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(8).activation(Activation.TANH).build(),
"input")
.addVertex("subset", new SubsetVertex(0, 3), "lstm1")
@@ -412,8 +412,8 @@ public void testLSTMWithLastTimeStepVertex() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE)
- .learningRate(1.0).graphBuilder().addInputs("input").setOutputs("out")
+ .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
+ .updater(new NoOp()).graphBuilder().addInputs("input").setOutputs("out")
.addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(4).activation(Activation.TANH).build(),
"input")
.addVertex("lastTS", new LastTimeStepVertex("input"), "lstm1")
@@ -465,7 +465,7 @@ public void testLSTMWithDuplicateToTimeSeries() {
new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).learningRate(1.0).graphBuilder()
+ .updater(new NoOp()).graphBuilder()
.addInputs("input1", "input2").setOutputs("out")
.addLayer("lstm1",
new GravesLSTM.Builder().nIn(3).nOut(4)
@@ -517,8 +517,8 @@ public void testMultipleInputsLayer() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE)
- .learningRate(1.0).activation(Activation.TANH).graphBuilder().addInputs("i0", "i1", "i2")
+ .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
+ .updater(new NoOp()).activation(Activation.TANH).graphBuilder().addInputs("i0", "i1", "i2")
.addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i0")
.addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i1")
.addLayer("d2", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i2")
@@ -559,8 +559,8 @@ public void testMultipleOutputsLayer() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE)
- .learningRate(1.0).activation(Activation.TANH).graphBuilder().addInputs("i0")
+ .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
+ .updater(new NoOp()).activation(Activation.TANH).graphBuilder().addInputs("i0")
.addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i0")
.addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "d0")
.addLayer("d2", new DenseLayer.Builder().nIn(2).nOut(2).build(), "d0")
@@ -598,8 +598,8 @@ public void testMultipleOutputsMergeVertex() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE)
- .learningRate(1.0).activation(Activation.TANH).graphBuilder().addInputs("i0", "i1", "i2")
+ .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
+ .updater(new NoOp()).activation(Activation.TANH).graphBuilder().addInputs("i0", "i1", "i2")
.addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i0")
.addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i1")
.addLayer("d2", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i2")
@@ -645,8 +645,8 @@ public void testMultipleOutputsMergeCnn() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE)
- .learningRate(1.0).activation(Activation.TANH).graphBuilder().addInputs("input")
+ .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
+ .updater(new NoOp()).activation(Activation.TANH).graphBuilder().addInputs("input")
.addLayer("l0", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0)
.nIn(2).nOut(2).activation(Activation.TANH).build(), "input")
.addLayer("l1", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0)
@@ -694,7 +694,7 @@ public void testBasicIrisTripletStackingL2Loss() {
new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).learningRate(1.0).graphBuilder()
+ .updater(new NoOp()).graphBuilder()
.addInputs("input1", "input2", "input3")
.addVertex("stack1", new StackVertex(), "input1", "input2", "input3")
.addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5)
@@ -767,7 +767,7 @@ public void testBasicCenterLoss() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new GaussianDistribution(0, 1))
- .updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input1")
+ .updater(new NoOp()).graphBuilder().addInputs("input1")
.addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH)
.build(), "input1")
.addLayer("cl", new CenterLossOutputLayer.Builder()
@@ -828,8 +828,8 @@ public void testCnnPoolCenterLoss() {
for (boolean train : trainFirst) {
for (double lambda : new double[] {0.0, 0.5, 2.0}) {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(3).build())
.layer(1, new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build())
@@ -883,7 +883,7 @@ public void testBasicL2() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder()
+ .activation(Activation.TANH).updater(new NoOp()).graphBuilder()
.addInputs("in1", "in2").addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in1")
.addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in2")
.addVertex("l2", new L2Vertex(), "d0", "d1")
@@ -934,7 +934,7 @@ public void testBasicStackUnstack() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder()
+ .activation(Activation.TANH).updater(new NoOp()).graphBuilder()
.addInputs("in1", "in2")
.addLayer("d0", new DenseLayer.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in1")
.addLayer("d1", new DenseLayer.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in2")
@@ -988,7 +988,7 @@ public void testBasicStackUnstackDebug() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder()
+ .activation(Activation.TANH).updater(new NoOp()).graphBuilder()
.addInputs("in1", "in2").addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in1")
.addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in2")
.addVertex("stack", new StackVertex(), "d0", "d1")
@@ -1047,7 +1047,7 @@ public void testBasicStackUnstackVariableLengthTS() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder()
+ .activation(Activation.TANH).updater(new NoOp()).graphBuilder()
.addInputs("in1", "in2")
.addLayer("d0", new GravesLSTM.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in1")
.addLayer("d1", new GravesLSTM.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in2")
@@ -1109,7 +1109,7 @@ public void testBasicTwoOutputs() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder()
+ .activation(Activation.TANH).updater(new NoOp()).graphBuilder()
.addInputs("in1", "in2").addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in1")
.addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in2")
.addLayer("out1",
@@ -1164,7 +1164,7 @@ public void testL2NormalizeVertex2d() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder()
+ .activation(Activation.TANH).updater(new NoOp()).graphBuilder()
.addInputs("in1").addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(3).build(), "in1")
.addVertex("norm", new L2NormalizeVertex(), "d1")
.addLayer("out1",
@@ -1210,7 +1210,7 @@ public void testL2NormalizeVertex4d() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder()
+ .activation(Activation.TANH).updater(new NoOp()).graphBuilder()
.addInputs("in1")
.addLayer("d1", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(2).build(),
"in1")
@@ -1258,9 +1258,9 @@ public void testGraphEmbeddingLayerSimple() {
labels.putScalar(new int[] {i, r.nextInt(3)}, 1.0);
}
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.1)
+ ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.1)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345L)
- .updater(Updater.NONE).graphBuilder().addInputs("in")
+ .updater(new NoOp()).graphBuilder().addInputs("in")
.addLayer("0", new EmbeddingLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER)
.activation(Activation.TANH).build(), "in")
.addLayer("1", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java
index 89663b404f2b..8bee4c8f96c3 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java
@@ -3,7 +3,6 @@
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.graph.ComputationGraph;
@@ -15,6 +14,7 @@
import org.nd4j.linalg.api.buffer.util.DataTypeUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.lossfunctions.impl.*;
@@ -105,14 +105,14 @@ public void gradientCheckMaskingOutputSimple() {
maskArr.putScalar(new int[] {0, j}, mask[i][j] ? 1.0 : 0.0);
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L)
.list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.layer(1, new RnnOutputLayer.Builder(s.lf).activation(s.act).nIn(layerSize).nOut(s.nOut)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
@@ -150,8 +150,8 @@ public void testBidirectionalLSTMMasking() {
int testNum = 0;
for (INDArray mask : masks) {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
.layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize)
.activation(Activation.TANH).build())
@@ -246,7 +246,7 @@ public void testPerOutputMaskingMLP() {
Activation a = act[i];
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp())
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345)
.list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
@@ -340,7 +340,7 @@ public void testPerOutputMaskingRnn() {
Activation a = act[i];
Nd4j.getRandom().setSeed(12345);
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp())
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345)
.list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
@@ -372,7 +372,7 @@ public void testPerOutputMaskingRnn() {
//Check the equivalent compgraph:
Nd4j.getRandom().setSeed(12345);
- ComputationGraphConfiguration cg = new NeuralNetConfiguration.Builder().updater(Updater.NONE)
+ ComputationGraphConfiguration cg = new NeuralNetConfiguration.Builder().updater(new NoOp())
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 2)).seed(12345)
.graphBuilder().addInputs("in")
.addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(layerSize)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java
index c96f4f3b1065..855c37e3a428 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java
@@ -2,7 +2,6 @@
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
@@ -16,6 +15,7 @@
import org.nd4j.linalg.api.buffer.util.DataTypeUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.Random;
@@ -52,8 +52,8 @@ public void testGradientLRNSimple() {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
- MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0)
- .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
+ .seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new ConvolutionLayer.Builder().nOut(6).kernelSize(2, 2).stride(1, 1)
.activation(Activation.TANH).build())
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java
index 701a674e7ae0..faa2d67ef961 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java
@@ -16,6 +16,7 @@
import org.nd4j.linalg.api.buffer.util.DataTypeUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
import java.util.Random;
@@ -57,27 +58,27 @@ public void testLSTMBasicMultiLayer() {
if (graves) {
l0 = new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.SIGMOID)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0))
- .updater(Updater.NONE).build();
+ .updater(new NoOp()).build();
l1 = new GravesLSTM.Builder().nIn(layerSize).nOut(layerSize).activation(Activation.SIGMOID)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0))
- .updater(Updater.NONE).build();
+ .updater(new NoOp()).build();
} else {
l0 = new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.SIGMOID)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0))
- .updater(Updater.NONE).build();
+ .updater(new NoOp()).build();
l1 = new LSTM.Builder().nIn(layerSize).nOut(layerSize).activation(Activation.SIGMOID)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0))
- .updater(Updater.NONE).build();
+ .updater(new NoOp()).build();
}
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list()
+ new NeuralNetConfiguration.Builder().seed(12345L).list()
.layer(0, l0).layer(1,
l1)
.layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut)
.weightInit(WeightInit.DISTRIBUTION)
- .dist(new NormalDistribution(0, 1.0)).updater(Updater.NONE)
+ .dist(new NormalDistribution(0, 1.0)).updater(new NoOp())
.build())
.pretrain(false).backprop(true).build();
@@ -166,9 +167,9 @@ public void testGradientLSTMFull() {
double l1 = l1vals[k];
NeuralNetConfiguration.Builder conf =
- new NeuralNetConfiguration.Builder().regularization(l1 > 0.0 || l2 > 0.0)
+ new NeuralNetConfiguration.Builder()
.seed(12345L).weightInit(WeightInit.DISTRIBUTION)
- .dist(new NormalDistribution(0, 1)).updater(Updater.NONE);
+ .dist(new NormalDistribution(0, 1)).updater(new NoOp());
if (l1 > 0.0)
conf.l1(l1);
@@ -255,9 +256,9 @@ public void testGradientLSTMEdgeCases() {
layer = new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build();
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).regularization(false)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).list().layer(0, layer)
+ .updater(new NoOp()).list().layer(0, layer)
.layer(1, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX)
.nIn(layerSize).nOut(nOut).build())
.pretrain(false).backprop(true).build();
@@ -321,7 +322,7 @@ public void testGradientGravesBidirectionalLSTMFull() {
double l1 = l1vals[k];
NeuralNetConfiguration.Builder conf =
- new NeuralNetConfiguration.Builder().regularization(l1 > 0.0 || l2 > 0.0);
+ new NeuralNetConfiguration.Builder();
if (l1 > 0.0)
conf.l1(l1);
if (l2 > 0.0)
@@ -341,7 +342,7 @@ public void testGradientGravesBidirectionalLSTMFull() {
.build())
.layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize)
.nOut(nOut).weightInit(WeightInit.DISTRIBUTION)
- .dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build())
+ .dist(new NormalDistribution(0, 1)).updater(new NoOp()).build())
.pretrain(false).backprop(true).build();
@@ -398,7 +399,7 @@ public void testGradientGravesBidirectionalLSTMEdgeCases() {
}
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L)
.list()
.layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize)
.weightInit(WeightInit.DISTRIBUTION)
@@ -407,7 +408,7 @@ public void testGradientGravesBidirectionalLSTMEdgeCases() {
.build())
.layer(1, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX)
.nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION)
- .dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build())
+ .dist(new NormalDistribution(0, 1)).updater(new NoOp()).build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
@@ -445,7 +446,7 @@ public void testGradientCnnFfRnn() {
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).seed(12345)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()).seed(12345)
.weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list()
.layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).nOut(5).stride(1, 1)
.activation(Activation.TANH).build()) //Out: (10-5)/1+1 = 6 -> 6x6x5
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java
index a61249e2f860..c30013991e9e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java
@@ -4,7 +4,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.LossLayer;
@@ -21,6 +20,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.BooleanIndexing;
import org.nd4j.linalg.indexing.conditions.Conditions;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.lossfunctions.impl.*;
import org.nd4j.shade.jackson.databind.ObjectMapper;
@@ -62,36 +62,36 @@ public void lossFunctionGradientCheck() {
LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),
LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),};
- String[] outputActivationFn = new String[] {"sigmoid", //xent
- "sigmoid", //xent
- "tanh", //cosine
- "tanh", //hinge -> trying to predict 1 or -1
- "sigmoid", //kld -> probab so should be between 0 and 1
- "softmax", //kld + softmax
- "tanh", //l1
- "rationaltanh", //l1
- "softmax", //l1 + softmax
- "tanh", //l2
- "softmax", //l2 + softmax
- "identity", //mae
- "softmax", //mae + softmax
- "identity", //mape
- "softmax", //mape + softmax
- "softmax", //mcxent
- "identity", //mse
- "softmax", //mse + softmax
- "sigmoid", //msle - requires positive labels/activations due to log
- "softmax", //msle + softmax
- "sigmoid", //nll
- "softmax", //nll + softmax
- "sigmoid", //poisson - requires positive predictions due to log... not sure if this is the best option
- "tanh", //squared hinge
- "sigmoid", //f-measure (binary, single sigmoid output)
- "sigmoid", //f-measure (binary, single sigmoid output)
- "softmax", //f-measure (binary, 2-label softmax output)
- "softmax", //f-measure (binary, 2-label softmax output)
- "identity", // MixtureDensity
- "tanh", // MixtureDensity + tanh
+ Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent
+ Activation.SIGMOID, //xent
+ Activation.TANH, //cosine
+ Activation.TANH, //hinge -> trying to predict 1 or -1
+ Activation.SIGMOID, //kld -> probab so should be between 0 and 1
+ Activation.SOFTMAX, //kld + softmax
+ Activation.TANH, //l1
+ Activation.RATIONALTANH, //l1
+ Activation.SOFTMAX, //l1 + softmax
+ Activation.TANH, //l2
+ Activation.SOFTMAX, //l2 + softmax
+ Activation.IDENTITY, //mae
+ Activation.SOFTMAX, //mae + softmax
+ Activation.IDENTITY, //mape
+ Activation.SOFTMAX, //mape + softmax
+ Activation.SOFTMAX, //mcxent
+ Activation.IDENTITY, //mse
+ Activation.SOFTMAX, //mse + softmax
+ Activation.SIGMOID, //msle - requires positive labels/activations due to log
+ Activation.SOFTMAX, //msle + softmax
+ Activation.SIGMOID, //nll
+ Activation.SOFTMAX, //nll + softmax
+ Activation.SIGMOID, //poisson - requires positive predictions due to log... not sure if this is the best option
+ Activation.TANH, //squared hinge
+ Activation.SIGMOID, //f-measure (binary, single sigmoid output)
+ Activation.SIGMOID, //f-measure (binary, single sigmoid output)
+ Activation.SOFTMAX, //f-measure (binary, 2-label softmax output)
+ Activation.SOFTMAX, //f-measure (binary, 2-label softmax output)
+ Activation.IDENTITY, // MixtureDensity
+ Activation.TANH, // MixtureDensity + tanh
};
int[] nOut = new int[] {1, //xent
@@ -140,7 +140,7 @@ public void lossFunctionGradientCheck() {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345)
- .updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION)
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-2, 2)).list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build())
.layer(1, new OutputLayer.Builder().lossFunction(lossFunctions[i])
@@ -205,35 +205,35 @@ public void lossFunctionGradientCheckLossLayer() {
new LossFMeasure(2.0), LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),
LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),};
- String[] outputActivationFn = new String[] {"sigmoid", //xent
- "sigmoid", //xent
- "tanh", //cosine
- "tanh", //hinge -> trying to predict 1 or -1
- "sigmoid", //kld -> probab so should be between 0 and 1
- "softmax", //kld + softmax
- "tanh", //l1
- "softmax", //l1 + softmax
- "tanh", //l2
- "softmax", //l2 + softmax
- "identity", //mae
- "softmax", //mae + softmax
- "identity", //mape
- "softmax", //mape + softmax
- "softmax", //mcxent
- "identity", //mse
- "softmax", //mse + softmax
- "sigmoid", //msle - requires positive labels/activations due to log
- "softmax", //msle + softmax
- "sigmoid", //nll
- "softmax", //nll + softmax
- "sigmoid", //poisson - requires positive predictions due to log... not sure if this is the best option
- "tanh", //squared hinge
- "sigmoid", //f-measure (binary, single sigmoid output)
- "sigmoid", //f-measure (binary, single sigmoid output)
- "softmax", //f-measure (binary, 2-label softmax output)
- "softmax", //f-measure (binary, 2-label softmax output)
- "identity", // MixtureDensity
- "tanh", // MixtureDensity + tanh
+ Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent
+ Activation.SIGMOID, //xent
+ Activation.TANH, //cosine
+ Activation.TANH, //hinge -> trying to predict 1 or -1
+ Activation.SIGMOID, //kld -> probab so should be between 0 and 1
+ Activation.SOFTMAX, //kld + softmax
+ Activation.TANH, //l1
+ Activation.SOFTMAX, //l1 + softmax
+ Activation.TANH, //l2
+ Activation.SOFTMAX, //l2 + softmax
+ Activation.IDENTITY, //mae
+ Activation.SOFTMAX, //mae + softmax
+ Activation.IDENTITY, //mape
+ Activation.SOFTMAX, //mape + softmax
+ Activation.SOFTMAX, //mcxent
+ Activation.IDENTITY, //mse
+ Activation.SOFTMAX, //mse + softmax
+ Activation.SIGMOID, //msle - requires positive labels/activations due to log
+ Activation.SOFTMAX, //msle + softmax
+ Activation.SIGMOID, //nll
+ Activation.SOFTMAX, //nll + softmax
+ Activation.SIGMOID, //poisson - requires positive predictions due to log... not sure if this is the best option
+ Activation.TANH, //squared hinge
+ Activation.SIGMOID, //f-measure (binary, single sigmoid output)
+ Activation.SIGMOID, //f-measure (binary, single sigmoid output)
+ Activation.SOFTMAX, //f-measure (binary, 2-label softmax output)
+ Activation.SOFTMAX, //f-measure (binary, 2-label softmax output)
+ Activation.IDENTITY, // MixtureDensity
+ Activation.TANH, // MixtureDensity + tanh
};
int[] nOut = new int[] {1, //xent
@@ -294,7 +294,7 @@ public void lossFunctionGradientCheckLossLayer() {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345)
- .updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION)
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-2, 2)).list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH)
.build())
@@ -485,22 +485,22 @@ public void lossFunctionWeightedGradientCheck() {
new LossMAPE(w), new LossMCXENT(w), new LossMSE(w), new LossMSE(w), new LossMSLE(w),
new LossMSLE(w), new LossNegativeLogLikelihood(w), new LossNegativeLogLikelihood(w),};
- String[] outputActivationFn = new String[] {"sigmoid", //xent
- "tanh", //l1
- "softmax", //l1 + softmax
- "tanh", //l2
- "softmax", //l2 + softmax
- "identity", //mae
- "softmax", //mae + softmax
- "identity", //mape
- "softmax", //mape + softmax
- "softmax", //mcxent
- "identity", //mse
- "softmax", //mse + softmax
- "sigmoid", //msle - requires positive labels/activations due to log
- "softmax", //msle + softmax
- "sigmoid", //nll
- "softmax", //nll + softmax
+ Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent
+ Activation.TANH, //l1
+ Activation.SOFTMAX, //l1 + softmax
+ Activation.TANH, //l2
+ Activation.SOFTMAX, //l2 + softmax
+ Activation.IDENTITY, //mae
+ Activation.SOFTMAX, //mae + softmax
+ Activation.IDENTITY, //mape
+ Activation.SOFTMAX, //mape + softmax
+ Activation.SOFTMAX, //mcxent
+ Activation.IDENTITY, //mse
+ Activation.SOFTMAX, //mse + softmax
+ Activation.SIGMOID, //msle - requires positive labels/activations due to log
+ Activation.SOFTMAX, //msle + softmax
+ Activation.SIGMOID, //nll
+ Activation.SOFTMAX, //nll + softmax
};
int[] minibatchSizes = new int[] {1, 3};
@@ -513,7 +513,7 @@ public void lossFunctionWeightedGradientCheck() {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345)
- .updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION)
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-3, 3)).list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH)
.build())
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java
index b3a91e7c5cc3..083e70fe51da 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java
@@ -1,27 +1,23 @@
package org.deeplearning4j.gradientcheck;
+import org.deeplearning4j.TestUtils;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
-import org.deeplearning4j.util.ModelSerializer;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.buffer.util.DataTypeUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -38,21 +34,6 @@ public class NoBiasGradientCheckTests {
DataTypeUtil.setDTypeForContext(DataBuffer.Type.DOUBLE);
}
- private static void checkSerialization(MultiLayerNetwork net){
- try {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ModelSerializer.writeModel(net, baos, true);
- byte[] bytes = baos.toByteArray();
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- MultiLayerNetwork net2 = ModelSerializer.restoreMultiLayerNetwork(bais, true);
- assertEquals(net.getLayerWiseConfigurations().toJson(), net2.getLayerWiseConfigurations().toJson());
- assertEquals(net.params(), net2.params());
- } catch (IOException e ){
- throw new RuntimeException(e);
- }
-
- }
-
@Test
public void testGradientNoBiasDenseOutput() {
@@ -70,8 +51,8 @@ public void testGradientNoBiasDenseOutput() {
for (boolean denseHasBias : new boolean[]{true, false}) {
for (boolean outHasBias : new boolean[]{true, false}) {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .updater(Updater.NONE)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp())
.seed(12345L)
.list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize)
@@ -120,7 +101,7 @@ public void testGradientNoBiasDenseOutput() {
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
- checkSerialization(mln);
+ TestUtils.testModelSerialization(mln);
}
}
}
@@ -142,8 +123,8 @@ public void testGradientNoBiasRnnOutput() {
for (boolean rnnOutHasBias : new boolean[]{true, false}) {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .updater(Updater.NONE)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp())
.seed(12345L)
.list()
.layer(0, new LSTM.Builder().nIn(nIn).nOut(layerSize)
@@ -178,7 +159,7 @@ public void testGradientNoBiasRnnOutput() {
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
- checkSerialization(mln);
+ TestUtils.testModelSerialization(mln);
}
}
}
@@ -202,8 +183,8 @@ public void testGradientNoBiasEmbedding() {
for (boolean embeddingHasBias : new boolean[]{true, false}) {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .updater(Updater.NONE)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp())
.seed(12345L)
.list()
.layer(0, new EmbeddingLayer.Builder().nIn(nIn).nOut(layerSize)
@@ -239,7 +220,7 @@ public void testGradientNoBiasEmbedding() {
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
- checkSerialization(mln);
+ TestUtils.testModelSerialization(mln);
}
}
}
@@ -268,8 +249,8 @@ public void testCnnWithSubsamplingNoBias() {
for(boolean cnnHasBias : new boolean[]{true, false}) {
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(1.0)
- .updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION)
+ new NeuralNetConfiguration.Builder().updater(new NoOp())
+ .weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
.list()
.layer(new ConvolutionLayer.Builder(kernel,
@@ -305,7 +286,7 @@ public void testCnnWithSubsamplingNoBias() {
assertTrue(msg, gradOK);
- checkSerialization(net);
+ TestUtils.testModelSerialization(net);
}
}
}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java
index 5873a9ba506e..be8f735f2a51 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java
@@ -1,9 +1,7 @@
package org.deeplearning4j.gradientcheck;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.variational.*;
@@ -19,6 +17,7 @@
import org.nd4j.linalg.api.ops.random.impl.BernoulliDistribution;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
import org.nd4j.linalg.lossfunctions.impl.LossMAE;
import org.nd4j.linalg.lossfunctions.impl.LossMSE;
@@ -48,10 +47,10 @@ public void testVaeAsMLP() {
//Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
//This gradient check tests this part
- String[] activFns = {"identity", "tanh"}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities
+ Activation[] activFns = {Activation.IDENTITY, Activation.TANH}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
- String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here
+ Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
//use l2vals[i] with l1vals[i]
double[] l2vals = {0.4, 0.0, 0.4, 0.4};
@@ -74,33 +73,31 @@ public void testVaeAsMLP() {
int[] encoderSizes = encoderLayerSizes[ls];
int[] decoderSizes = decoderLayerSizes[ls];
- for (String afn : activFns) {
+ for (Activation afn : activFns) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
- String outputActivation = outputActivations[i];
+ Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1)
+ new NeuralNetConfiguration.Builder().l2(l2).l1(l1)
+ .updater(new NoOp())
.l2Bias(biasL2[k]).l1Bias(biasL1[k])
- .optimizationAlgo(
- OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .learningRate(1.0).seed(12345L).list()
+ .updater(new NoOp()).seed(12345L).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(4)
.nOut(3).encoderLayerSizes(encoderSizes)
.decoderLayerSizes(decoderSizes)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
- .activation(afn).updater(
- Updater.SGD)
+ .activation(afn)
.build())
.layer(1, new OutputLayer.Builder(lf)
.activation(outputActivation).nIn(3).nOut(3)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
- .updater(Updater.SGD).build())
+ .build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
@@ -131,9 +128,9 @@ public void testVaeAsMLP() {
@Test
public void testVaePretrain() {
- String[] activFns = {"identity", "identity", "tanh", "tanh"}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities
- String[] pzxAfns = {"identity", "tanh", "identity", "tanh"};
- String[] pxzAfns = {"tanh", "identity", "tanh", "identity"};
+ Activation[] activFns = {Activation.IDENTITY, Activation.IDENTITY, Activation.TANH, Activation.TANH}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities
+ Activation[] pzxAfns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH};
+ Activation[] pxzAfns = {Activation.TANH, Activation.IDENTITY, Activation.TANH, Activation.IDENTITY};
//use l2vals[i] with l1vals[i]
double[] l2vals = {0.4, 0.0, 0.4, 0.4};
@@ -153,22 +150,21 @@ public void testVaePretrain() {
int[] decoderSizes = decoderLayerSizes[ls];
for (int j = 0; j < activFns.length; j++) {
- String afn = activFns[j];
- String pzxAfn = pzxAfns[j];
- String pxzAfn = pxzAfns[j];
+ Activation afn = activFns[j];
+ Activation pzxAfn = pzxAfns[j];
+ Activation pxzAfn = pxzAfns[j];
double l2 = l2vals[j]; //Ideally we'd do the cartesian product of l1/l2 and the activation functions, but that takes too long...
double l1 = l1vals[j];
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2)
- .l1(l1).l2Bias(biasL2[j]).l1Bias(biasL1[j])
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .learningRate(1.0).seed(12345L).weightInit(WeightInit.XAVIER).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(l2)
+ .l1(l1).l2Bias(biasL2[j]).l1Bias(biasL1[j]).updater(new NoOp())
+ .seed(12345L).weightInit(WeightInit.XAVIER).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3)
.encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes)
.pzxActivationFunction(pzxAfn)
.reconstructionDistribution(
new GaussianReconstructionDistribution(pxzAfn))
- .activation(afn).updater(Updater.SGD).build())
+ .activation(afn).build())
.pretrain(true).backprop(false).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
@@ -255,8 +251,8 @@ public void testVaePretrainReconstructionDistributions() {
throw new RuntimeException();
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.3)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3)
+ .updater(new NoOp())
.seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
.list().layer(0,
new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3)
@@ -264,7 +260,7 @@ public void testVaePretrainReconstructionDistributions() {
.pzxActivationFunction(Activation.TANH)
.reconstructionDistribution(
reconstructionDistributions[i])
- .activation(Activation.TANH).updater(Updater.SGD)
+ .activation(Activation.TANH)
.build())
.pretrain(true).backprop(false).build();
@@ -299,14 +295,14 @@ public void testVaePretrainMultipleSamples() {
// for (int numSamples : new int[]{10}) {
INDArray features = Nd4j.rand(minibatch, 4);
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.3)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3)
+ .updater(new NoOp())
.seed(12345L).weightInit(WeightInit.XAVIER).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(5, 6)
.decoderLayerSizes(7, 8).pzxActivationFunction(Activation.TANH)
.reconstructionDistribution(
new GaussianReconstructionDistribution(Activation.TANH))
- .numSamples(numSamples).activation(Activation.TANH).updater(Updater.SGD)
+ .numSamples(numSamples).activation(Activation.TANH)
.build())
.pretrain(true).backprop(false).build();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java
index d250e95b7b59..4638e8a7def5 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java
@@ -3,8 +3,7 @@
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
-import org.deeplearning4j.nn.conf.layers.*;
+import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.Test;
@@ -13,8 +12,9 @@
import org.nd4j.linalg.api.buffer.util.DataTypeUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertTrue;
/**
* @author Alex Black
@@ -64,7 +64,7 @@ public void testYoloOutputLayer() {
INDArray labels = yoloLabels(mb, c, h, w);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
- .learningRate(1.0).updater(Updater.SGD)
+ .updater(new NoOp())
.activation(a)
.l1(l1[i]).l2(l2[i])
.convolutionMode(ConvolutionMode.Same)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java
index 04d64aac5543..51c7dbae0192 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java
@@ -7,10 +7,16 @@
import lombok.NoArgsConstructor;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
-import org.deeplearning4j.nn.conf.graph.*;
+import org.deeplearning4j.nn.conf.graph.ElementWiseVertex;
+import org.deeplearning4j.nn.conf.graph.GraphVertex;
+import org.deeplearning4j.nn.conf.graph.MergeVertex;
+import org.deeplearning4j.nn.conf.graph.SubsetVertex;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.inputs.InvalidInputTypeException;
-import org.deeplearning4j.nn.conf.layers.*;
+import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
+import org.deeplearning4j.nn.conf.layers.DenseLayer;
+import org.deeplearning4j.nn.conf.layers.OutputLayer;
+import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
import org.deeplearning4j.nn.conf.memory.MemoryReport;
import org.deeplearning4j.nn.conf.misc.TestGraphVertex;
import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
@@ -20,6 +26,7 @@
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import static org.junit.Assert.assertEquals;
@@ -31,8 +38,8 @@ public class ComputationGraphConfigurationTest {
public void testJSONBasic() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE)
- .learningRate(1.0).graphBuilder().addInputs("input")
+ .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(new NoOp())
+ .graphBuilder().addInputs("input")
.addLayer("firstLayer",
new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(),
"input")
@@ -231,43 +238,6 @@ public void testOutputOrderDoesntChangeWhenCloning() {
assertEquals(json, jsonCloned);
}
- @Test
- public void testBiasLr() {
- //setup the network
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).learningRate(1e-2)
- .biasLearningRate(0.5).graphBuilder().addInputs("in")
- .addLayer("0", new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5)
- .weightInit(WeightInit.XAVIER).activation(Activation.RELU).build(), "in")
- .addLayer("1", new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build(), "0")
- .addLayer("2", new DenseLayer.Builder().nOut(100).activation(Activation.RELU)
- .biasLearningRate(0.25).build(), "1")
- .addLayer("3", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
- .nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build(),
- "2")
- .setOutputs("3").setInputTypes(InputType.convolutional(28, 28, 1)).build();
-
- org.deeplearning4j.nn.conf.layers.BaseLayer l0 =
- (BaseLayer) ((LayerVertex) conf.getVertices().get("0")).getLayerConf().getLayer();
- org.deeplearning4j.nn.conf.layers.BaseLayer l1 =
- (BaseLayer) ((LayerVertex) conf.getVertices().get("1")).getLayerConf().getLayer();
- org.deeplearning4j.nn.conf.layers.BaseLayer l2 =
- (BaseLayer) ((LayerVertex) conf.getVertices().get("2")).getLayerConf().getLayer();
- org.deeplearning4j.nn.conf.layers.BaseLayer l3 =
- (BaseLayer) ((LayerVertex) conf.getVertices().get("3")).getLayerConf().getLayer();
-
- assertEquals(0.5, l0.getBiasLearningRate(), 1e-6);
- assertEquals(1e-2, l0.getLearningRate(), 1e-6);
-
- assertEquals(0.5, l1.getBiasLearningRate(), 1e-6);
- assertEquals(1e-2, l1.getLearningRate(), 1e-6);
-
- assertEquals(0.25, l2.getBiasLearningRate(), 1e-6);
- assertEquals(1e-2, l2.getLearningRate(), 1e-6);
-
- assertEquals(0.5, l3.getBiasLearningRate(), 1e-6);
- assertEquals(1e-2, l3.getLearningRate(), 1e-6);
- }
-
@AllArgsConstructor
@NoArgsConstructor
@Data
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java
index b396a58ebd69..b2c92af134af 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java
@@ -24,31 +24,31 @@ public void testJsonLossFunctions() {
new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(),
new LossSquaredHinge(), new LossFMeasure(), new LossFMeasure(2.0)};
- String[] outputActivationFn = new String[] {"sigmoid", //xent
- "sigmoid", //xent
- "tanh", //cosine
- "tanh", //hinge -> trying to predict 1 or -1
- "sigmoid", //kld -> probab so should be between 0 and 1
- "softmax", //kld + softmax
- "tanh", //l1
- "softmax", //l1 + softmax
- "tanh", //l2
- "softmax", //l2 + softmax
- "identity", //mae
- "softmax", //mae + softmax
- "identity", //mape
- "softmax", //mape + softmax
- "softmax", //mcxent
- "identity", //mse
- "softmax", //mse + softmax
- "sigmoid", //msle - requires positive labels/activations due to log
- "softmax", //msle + softmax
- "sigmoid", //nll
- "softmax", //nll + softmax
- "sigmoid", //poisson - requires positive predictions due to log... not sure if this is the best option
- "tanh", //squared hinge
- "sigmoid", //f-measure (binary, single sigmoid output)
- "softmax" //f-measure (binary, 2-label softmax output)
+ Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent
+ Activation.SIGMOID, //xent
+ Activation.TANH, //cosine
+ Activation.TANH, //hinge -> trying to predict 1 or -1
+ Activation.SIGMOID, //kld -> probab so should be between 0 and 1
+ Activation.SOFTMAX, //kld + softmax
+ Activation.TANH, //l1
+ Activation.SOFTMAX, //l1 + softmax
+ Activation.TANH, //l2
+ Activation.SOFTMAX, //l2 + softmax
+ Activation.IDENTITY, //mae
+ Activation.SOFTMAX, //mae + softmax
+ Activation.IDENTITY, //mape
+ Activation.SOFTMAX, //mape + softmax
+ Activation.SOFTMAX, //mcxent
+ Activation.IDENTITY, //mse
+ Activation.SOFTMAX, //mse + softmax
+ Activation.SIGMOID, //msle - requires positive labels/activations due to log
+ Activation.SOFTMAX, //msle + softmax
+ Activation.SIGMOID, //nll
+ Activation.SOFTMAX, //nll + softmax
+ Activation.SIGMOID, //poisson - requires positive predictions due to log... not sure if this is the best option
+ Activation.TANH, //squared hinge
+ Activation.SIGMOID, //f-measure (binary, single sigmoid output)
+ Activation.SOFTMAX //f-measure (binary, 2-label softmax output)
};
int[] nOut = new int[] {1, //xent
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java
index fec5f2616551..b5c938f08a29 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java
@@ -24,6 +24,7 @@
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
+import org.deeplearning4j.nn.conf.weightnoise.DropConnect;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.api.IterationListener;
@@ -31,6 +32,8 @@
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Adam;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.io.*;
@@ -87,7 +90,7 @@ public void testConvnetJson() {
//setup the network
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
- .regularization(true).l1(1e-1).l2(2e-4).useDropConnect(true).dropOut(0.5).miniBatch(true)
+ .l1(1e-1).l2(2e-4).weightNoise(new DropConnect(0.5)).miniBatch(true)
.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list()
.layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER)
.activation(Activation.RELU).build())
@@ -121,7 +124,7 @@ public void testUpsamplingConvnetJson() {
//setup the network
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
- .regularization(true).l1(1e-1).l2(2e-4).useDropConnect(true).dropOut(0.5).miniBatch(true)
+ .l1(1e-1).l2(2e-4).dropOut(0.5).miniBatch(true)
.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list()
.layer(new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER)
.activation(Activation.RELU).build())
@@ -144,7 +147,7 @@ public void testUpsamplingConvnetJson() {
@Test
public void testGlobalPoolingJson() {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).updater(Updater.NONE)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp())
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(5).build())
.layer(1, new GlobalPoolingLayer.Builder().poolingType(PoolingType.PNORM).pnorm(3).build())
@@ -363,13 +366,12 @@ public void testPreBackFineValidation() {
@Test
public void testBiasLr() {
//setup the network
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).learningRate(1e-2)
- .biasLearningRate(0.5).updater(Updater.ADAM).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new Adam(1e-2))
+ .biasUpdater(new Adam(0.5)).list()
.layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).weightInit(WeightInit.XAVIER)
.activation(Activation.RELU).build())
.layer(1, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build())
- .layer(2, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).biasLearningRate(0.25)
- .build())
+ .layer(2, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10)
.weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build())
.setInputType(InputType.convolutional(28, 28, 1)).build();
@@ -379,17 +381,17 @@ public void testBiasLr() {
org.deeplearning4j.nn.conf.layers.BaseLayer l2 = (BaseLayer) conf.getConf(2).getLayer();
org.deeplearning4j.nn.conf.layers.BaseLayer l3 = (BaseLayer) conf.getConf(3).getLayer();
- assertEquals(0.5, l0.getBiasLearningRate(), 1e-6);
- assertEquals(1e-2, l0.getLearningRate(), 1e-6);
+ assertEquals(0.5, ((Adam)l0.getUpdaterByParam("b")).getLearningRate(), 1e-6);
+ assertEquals(1e-2, ((Adam)l0.getUpdaterByParam("W")).getLearningRate(), 1e-6);
- assertEquals(0.5, l1.getBiasLearningRate(), 1e-6);
- assertEquals(1e-2, l1.getLearningRate(), 1e-6);
+ assertEquals(0.5, ((Adam)l1.getUpdaterByParam("b")).getLearningRate(), 1e-6);
+ assertEquals(1e-2, ((Adam)l1.getUpdaterByParam("W")).getLearningRate(), 1e-6);
- assertEquals(0.25, l2.getBiasLearningRate(), 1e-6);
- assertEquals(1e-2, l2.getLearningRate(), 1e-6);
+ assertEquals(0.5, ((Adam)l2.getUpdaterByParam("b")).getLearningRate(), 1e-6);
+ assertEquals(1e-2, ((Adam)l2.getUpdaterByParam("W")).getLearningRate(), 1e-6);
- assertEquals(0.5, l3.getBiasLearningRate(), 1e-6);
- assertEquals(1e-2, l3.getLearningRate(), 1e-6);
+ assertEquals(0.5, ((Adam)l3.getUpdaterByParam("b")).getLearningRate(), 1e-6);
+ assertEquals(1e-2, ((Adam)l3.getUpdaterByParam("W")).getLearningRate(), 1e-6);
}
}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java
index 479988450f91..1c5bebb96df0 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java
@@ -29,7 +29,7 @@ public class MultiNeuralNetConfLayerBuilderTest {
int numIn = 10;
int numOut = 5;
double drop = 0.3;
- String act = "softmax";
+ Activation act = Activation.SOFTMAX;
PoolingType poolType = PoolingType.MAX;
int[] filterSize = new int[] {2, 2};
int filterDepth = 6;
@@ -78,7 +78,7 @@ public void testRbmSetup() throws Exception {
MultiLayerConfiguration multiLayerConfiguration =
new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
.seed(123).iterations(5).maxNumLineSearchIterations(10) // Magical Optimisation Stuff
- .regularization(true)
+
.list().layer(0,
new RBM.Builder(RBM.HiddenUnit.RECTIFIED,
RBM.VisibleUnit.GAUSSIAN).nIn(784).nOut(1000)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java
index 013899a3bd72..70768512c17c 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java
@@ -35,10 +35,9 @@
import org.nd4j.linalg.api.ops.impl.transforms.LeakyReLU;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
-import java.util.HashMap;
-
import static org.junit.Assert.*;
/**
@@ -92,14 +91,12 @@ public void testYaml() {
public void testClone() {
NeuralNetConfiguration conf = getRBMConfig(1, 1, WeightInit.UNIFORM, true);
BaseLayer bl = (BaseLayer) conf.getLayer();
- bl.setMomentumSchedule(new HashMap());
conf.setStepFunction(new DefaultStepFunction());
NeuralNetConfiguration conf2 = conf.clone();
assertEquals(conf, conf2);
assertNotSame(conf, conf2);
- assertNotSame(bl.getMomentumSchedule(), ((BaseLayer) conf2.getLayer()).getMomentumSchedule());
assertNotSame(conf.getLayer(), conf2.getLayer());
assertNotSame(bl.getDist(), ((BaseLayer) conf2.getLayer()).getDist());
assertNotSame(conf.getStepFunction(), conf2.getStepFunction());
@@ -207,7 +204,7 @@ private static NeuralNetConfiguration getRBMConfig(int nIn, int nOut, WeightInit
.activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build();
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().iterations(3)
- .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).regularization(false).layer(layer)
+ .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).layer(layer)
.build();
conf.setPretrain(pretrain);
return conf;
@@ -232,14 +229,11 @@ public void testLearningRateByParam() {
int iteration = 3;
INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]);
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)).list()
.layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0])
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(lr)
- .biasLearningRate(biasLr).build())
- .layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).learningRate(0.7)
- .build())
- .layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2])
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
+ .updater(new Sgd(lr)).biasUpdater(new Sgd(biasLr)).build())
+ .layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).updater(new Sgd(0.7)).build())
+ .layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]).build())
.backprop(true).pretrain(false).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
@@ -248,11 +242,11 @@ public void testLearningRateByParam() {
ConvexOptimizer opt = new StochasticGradientDescent(net.getDefaultConfiguration(),
new NegativeDefaultStepFunction(), null, net);
opt.checkTerminalConditions(gradientW, oldScore, newScore, iteration);
- assertEquals(lr, net.getLayer(0).conf().getLearningRateByParam("W"), 1e-4);
- assertEquals(biasLr, net.getLayer(0).conf().getLearningRateByParam("b"), 1e-4);
- assertEquals(0.7, net.getLayer(1).conf().getLearningRateByParam("gamma"), 1e-4);
- assertEquals(0.3, net.getLayer(2).conf().getLearningRateByParam("W"), 1e-4); //From global LR
- assertEquals(0.3, net.getLayer(2).conf().getLearningRateByParam("b"), 1e-4); //From global LR
+ assertEquals(lr, ((Sgd)net.getLayer(0).conf().getLayer().getUpdaterByParam("W")).getLearningRate(), 1e-4);
+ assertEquals(biasLr, ((Sgd)net.getLayer(0).conf().getLayer().getUpdaterByParam("b")).getLearningRate(), 1e-4);
+ assertEquals(0.7, ((Sgd)net.getLayer(1).conf().getLayer().getUpdaterByParam("gamma")).getLearningRate(), 1e-4);
+ assertEquals(0.3, ((Sgd)net.getLayer(2).conf().getLayer().getUpdaterByParam("W")).getLearningRate(), 1e-4); //From global LR
+ assertEquals(0.3, ((Sgd)net.getLayer(2).conf().getLayer().getUpdaterByParam("W")).getLearningRate(), 1e-4); //From global LR
}
@Test
@@ -297,13 +291,11 @@ public void testL1L2ByParam() {
int iteration = 3;
INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]);
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(8).regularization(true).l1(l1)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l1(l1)
.l2(l2).list()
- .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0])
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
+ .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).build())
.layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).l2(0.5).build())
- .layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2])
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
+ .layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]).build())
.backprop(true).pretrain(false).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
@@ -328,7 +320,7 @@ public void testLayerPretrainConfig() {
org.deeplearning4j.nn.conf.layers.RBM layer =
new org.deeplearning4j.nn.conf.layers.RBM.Builder(RBM.HiddenUnit.BINARY, RBM.VisibleUnit.BINARY)
- .nIn(10).nOut(5).learningRate(1e-1f)
+ .nIn(10).nOut(5).updater(new Sgd(1e-1))
.lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build();
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).seed(42).layer(layer).build();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java
index d5b9faf8ab5e..305674ad22e5 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java
@@ -1,5 +1,6 @@
package org.deeplearning4j.nn.conf.constraints;
+import org.deeplearning4j.TestUtils;
import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@@ -13,15 +14,12 @@
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
-import org.deeplearning4j.util.ModelSerializer;
import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -40,7 +38,7 @@ public void testLayerRecurrentConstraints() throws Exception {
for (LayerConstraint lc : constraints) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .learningRate(0.0)
+ .updater(new Sgd(0.0))
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 5))
.list()
.layer(new LSTM.Builder().nIn(12).nOut(10)
@@ -75,15 +73,7 @@ public void testLayerRecurrentConstraints() throws Exception {
assertEquals(RW0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
}
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ModelSerializer.writeModel(net, baos, true);
- byte[] bytes = baos.toByteArray();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
-
- assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations());
- assertEquals(net.params(), restored.params());
+ TestUtils.testModelSerialization(net);
}
}
@@ -100,7 +90,7 @@ public void testLayerBiasConstraints() throws Exception {
for (LayerConstraint lc : constraints) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .learningRate(0.0)
+ .updater(new Sgd(0.0))
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 5))
.biasInit(10.0)
.list()
@@ -136,15 +126,7 @@ public void testLayerBiasConstraints() throws Exception {
assertEquals(b0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
}
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ModelSerializer.writeModel(net, baos, true);
- byte[] bytes = baos.toByteArray();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
-
- assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations());
- assertEquals(net.params(), restored.params());
+ TestUtils.testModelSerialization(net);
}
}
@@ -161,7 +143,7 @@ public void testLayerWeightsConstraints() throws Exception {
for (LayerConstraint lc : constraints) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .learningRate(0.0)
+ .updater(new Sgd(0.0))
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 5))
.list()
.layer(new DenseLayer.Builder().nIn(12).nOut(10)
@@ -196,15 +178,7 @@ public void testLayerWeightsConstraints() throws Exception {
assertEquals(w0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
}
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ModelSerializer.writeModel(net, baos, true);
- byte[] bytes = baos.toByteArray();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
-
- assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations());
- assertEquals(net.params(), restored.params());
+ TestUtils.testModelSerialization(net);
}
}
@@ -221,7 +195,7 @@ public void testLayerWeightsAndBiasConstraints() throws Exception {
for (LayerConstraint lc : constraints) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .learningRate(0.0)
+ .updater(new Sgd(0.0))
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 5))
.biasInit(0.2)
.list()
@@ -264,15 +238,7 @@ public void testLayerWeightsAndBiasConstraints() throws Exception {
assertEquals(b0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
}
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ModelSerializer.writeModel(net, baos, true);
- byte[] bytes = baos.toByteArray();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
-
- assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations());
- assertEquals(net.params(), restored.params());
+ TestUtils.testModelSerialization(net);
}
}
@@ -290,7 +256,7 @@ public void testLayerWeightsAndBiasSeparateConstraints() throws Exception {
for (LayerConstraint lc : constraints) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .learningRate(0.0)
+ .updater(new Sgd(0.0))
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 5))
.biasInit(0.2)
.list()
@@ -333,15 +299,7 @@ public void testLayerWeightsAndBiasSeparateConstraints() throws Exception {
assertEquals(b0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
}
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ModelSerializer.writeModel(net, baos, true);
- byte[] bytes = baos.toByteArray();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
-
- assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations());
- assertEquals(net.params(), restored.params());
+ TestUtils.testModelSerialization(net);
}
}
@@ -359,7 +317,7 @@ public void testModelConstraints() throws Exception {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.constrainWeights(lc)
- .learningRate(0.0)
+ .updater(new Sgd(0.0))
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0,5))
.biasInit(1)
.list()
@@ -400,15 +358,7 @@ public void testModelConstraints() throws Exception {
}
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ModelSerializer.writeModel(net, baos, true);
- byte[] bytes = baos.toByteArray();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
-
- assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations());
- assertEquals(net.params(), restored.params());
+ TestUtils.testModelSerialization(net);
}
}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java
new file mode 100644
index 000000000000..444b97d82829
--- /dev/null
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java
@@ -0,0 +1,296 @@
+package org.deeplearning4j.nn.conf.dropout;
+
+import lombok.Data;
+import org.deeplearning4j.TestUtils;
+import org.deeplearning4j.datasets.iterator.ExistingDataSetIterator;
+import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.graph.LayerVertex;
+import org.deeplearning4j.nn.conf.layers.DenseLayer;
+import org.deeplearning4j.nn.conf.layers.OutputLayer;
+import org.deeplearning4j.nn.graph.ComputationGraph;
+import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
+import org.junit.Test;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.impl.accum.MatchCondition;
+import org.nd4j.linalg.dataset.DataSet;
+import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.indexing.conditions.Conditions;
+import org.nd4j.linalg.lossfunctions.LossFunctions;
+import org.nd4j.linalg.primitives.Triple;
+import org.nd4j.linalg.schedule.MapSchedule;
+import org.nd4j.linalg.schedule.ScheduleType;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestDropout {
+
+ @Test
+ public void testBasicConfig(){
+
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .dropOut(0.6)
+ .list()
+ .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
+ .layer(new DenseLayer.Builder().nIn(10).nOut(10).dropOut(0.7).build())
+ .layer(new DenseLayer.Builder().nIn(10).nOut(10).dropOut(new AlphaDropout(0.5)).build())
+ .build();
+
+ assertEquals(new Dropout(0.6), conf.getConf(0).getLayer().getIDropout());
+ assertEquals(new Dropout(0.7), conf.getConf(1).getLayer().getIDropout());
+ assertEquals(new AlphaDropout(0.5), conf.getConf(2).getLayer().getIDropout());
+
+
+ ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder()
+ .dropOut(0.6)
+ .graphBuilder()
+ .addInputs("in")
+ .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in")
+ .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).dropOut(0.7).build(), "0")
+ .addLayer("2", new DenseLayer.Builder().nIn(10).nOut(10).dropOut(new AlphaDropout(0.5)).build(), "1")
+ .setOutputs("2")
+ .build();
+
+ assertEquals(new Dropout(0.6), ((LayerVertex)conf2.getVertices().get("0")).getLayerConf().getLayer().getIDropout());
+ assertEquals(new Dropout(0.7), ((LayerVertex)conf2.getVertices().get("1")).getLayerConf().getLayer().getIDropout());
+ assertEquals(new AlphaDropout(0.5), ((LayerVertex)conf2.getVertices().get("2")).getLayerConf().getLayer().getIDropout());
+ }
+
+ @Test
+ public void testCalls(){
+
+ CustomDropout d1 = new CustomDropout();
+ CustomDropout d2 = new CustomDropout();
+
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .list()
+ .layer(new DenseLayer.Builder().nIn(4).nOut(3).dropOut(d1).build())
+ .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MSE).dropOut(d2).nIn(3).nOut(3).build())
+ .build();
+ MultiLayerNetwork net = new MultiLayerNetwork(conf);
+ net.init();
+
+ List l = new ArrayList<>();
+ l.add(new DataSet(Nd4j.rand(5,4), Nd4j.rand(5,3)));
+ l.add(new DataSet(Nd4j.rand(5,4), Nd4j.rand(5,3)));
+ l.add(new DataSet(Nd4j.rand(5,4), Nd4j.rand(5,3)));
+
+ DataSetIterator iter = new ExistingDataSetIterator(l);
+
+ net.fit(iter);
+ net.fit(iter);
+
+ List> expList = Arrays.asList(
+ new Triple<>(0, 0, false),
+ new Triple<>(1, 0, false),
+ new Triple<>(2, 0, false),
+ new Triple<>(3, 1, false),
+ new Triple<>(4, 1, false),
+ new Triple<>(5, 1, false));
+
+ assertEquals(expList, d1.getAllCalls());
+ assertEquals(expList, d2.getAllCalls());
+
+
+ d1 = new CustomDropout();
+ d2 = new CustomDropout();
+ ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder()
+ .graphBuilder()
+ .addInputs("in")
+ .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).dropOut(d1).build(), "in")
+ .addLayer("1", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).dropOut(d2).nIn(3).nOut(3).build(), "0")
+ .setOutputs("1")
+ .build();
+
+ ComputationGraph net2 = new ComputationGraph(conf2);
+ net2.init();
+
+ net2.fit(iter);
+ net2.fit(iter);
+
+ assertEquals(expList, d1.getAllCalls());
+ assertEquals(expList, d2.getAllCalls());
+ }
+
+ @Data
+ private static class CustomDropout implements IDropout{
+
+ private List> allCalls = new ArrayList<>();
+
+ @Override
+ public INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace) {
+ allCalls.add(new Triple<>(iteration, epoch, inPlace));
+ return inputActivations;
+ }
+
+ @Override
+ public IDropout clone() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ @Test
+ public void testSerialization(){
+
+ IDropout[] dropouts = new IDropout[]{
+ new Dropout(0.5),
+ new AlphaDropout(0.5),
+ new GaussianDropout(0.1),
+ new GaussianNoise(0.1)};
+
+ for(IDropout id : dropouts) {
+
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .dropOut(id)
+ .list()
+ .layer(new DenseLayer.Builder().nIn(4).nOut(3).build())
+ .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(3).nOut(3).build())
+ .build();
+ MultiLayerNetwork net = new MultiLayerNetwork(conf);
+ net.init();
+
+ TestUtils.testModelSerialization(net);
+
+ ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder()
+ .dropOut(id)
+ .graphBuilder()
+ .addInputs("in")
+ .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "in")
+ .addLayer("1", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(3).nOut(3).build(), "0")
+ .setOutputs("1")
+ .build();
+
+ ComputationGraph net2 = new ComputationGraph(conf2);
+ net2.init();
+
+ TestUtils.testModelSerialization(net2);
+ }
+ }
+
+ @Test
+ public void testDropoutValues(){
+ Nd4j.getRandom().setSeed(12345);
+
+ Dropout d = new Dropout(0.5);
+
+ INDArray in = Nd4j.ones(10, 10);
+ INDArray out = d.applyDropout(in, 0, 0, false);
+
+ assertEquals(in, Nd4j.ones(10, 10));
+
+ int countZeros = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(0))).z().getInt(0);
+ int countTwos = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(2))).z().getInt(0);
+
+ assertEquals(100, countZeros + countTwos); //Should only be 0 or 2
+ //Stochastic, but this should hold for most cases
+ assertTrue(countZeros >= 25 && countZeros <= 75);
+ assertTrue(countTwos >= 25 && countTwos <= 75);
+
+ //Test schedule:
+ d = new Dropout(new MapSchedule.Builder(ScheduleType.ITERATION).add(0, 0.5).add(5, 0.1).build());
+ for( int i=0; i<10; i++ ) {
+ out = d.applyDropout(in, i, 0, false);
+ assertEquals(in, Nd4j.ones(10, 10));
+ countZeros = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(0))).z().getInt(0);
+
+ if(i < 5){
+ countTwos = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(2))).z().getInt(0);
+ assertEquals(String.valueOf(i), 100, countZeros + countTwos); //Should only be 0 or 2
+ //Stochastic, but this should hold for most cases
+ assertTrue(countZeros >= 25 && countZeros <= 75);
+ assertTrue(countTwos >= 25 && countTwos <= 75);
+ } else {
+ int countInverse = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(1.0/0.1))).z().getInt(0);
+ assertEquals(100, countZeros + countInverse); //Should only be 0 or 10
+ //Stochastic, but this should hold for most cases
+ assertTrue(countZeros >= 80);
+ assertTrue(countInverse <= 20);
+ }
+ }
+ }
+
+ @Test
+ public void testGaussianDropoutValues(){
+ Nd4j.getRandom().setSeed(12345);
+
+ GaussianDropout d = new GaussianDropout(0.1); //sqrt(0.1/(1-0.1)) = 0.3333 stdev
+
+ INDArray in = Nd4j.ones(50, 50);
+ INDArray out = d.applyDropout(in, 0, 0, false);
+
+ assertEquals(in, Nd4j.ones(50, 50));
+
+ double mean = out.meanNumber().doubleValue();
+ double stdev = out.stdNumber().doubleValue();
+
+ assertEquals(1.0, mean, 0.05);
+ assertEquals(0.333, stdev, 0.02);
+ }
+
+ @Test
+ public void testGaussianNoiseValues(){
+ Nd4j.getRandom().setSeed(12345);
+
+ GaussianNoise d = new GaussianNoise(0.1); //sqrt(0.1/(1-0.1)) = 0.3333 stdev
+
+ INDArray in = Nd4j.ones(50, 50);
+ INDArray out = d.applyDropout(in, 0, 0, false);
+
+ assertEquals(in, Nd4j.ones(50, 50));
+
+ double mean = out.meanNumber().doubleValue();
+ double stdev = out.stdNumber().doubleValue();
+
+ assertEquals(1.0, mean, 0.05);
+ assertEquals(0.1, stdev, 0.01);
+ }
+
+ @Test
+ public void testAlphaDropoutValues(){
+ Nd4j.getRandom().setSeed(12345);
+
+ double p = 0.4;
+ AlphaDropout d = new AlphaDropout(p);
+
+ double SELU_ALPHA = 1.6732632423543772;
+ double SELU_LAMBDA = 1.0507009873554804;
+ double alphaPrime = - SELU_LAMBDA * SELU_ALPHA;
+ double a = 1.0 / Math.sqrt((p + alphaPrime * alphaPrime * p * (1-p)));
+ double b = -1.0 / Math.sqrt(p + alphaPrime * alphaPrime * p * (1-p)) * (1-p) * alphaPrime;
+
+ double actA = d.a(p);
+ double actB = d.b(p);
+
+ assertEquals(a, actA, 1e-6);
+ assertEquals(b, actB, 1e-6);
+
+ INDArray in = Nd4j.ones(10, 10);
+ INDArray out = d.applyDropout(in, 0, 0, false);
+
+ int countValueDropped = 0;
+ int countEqn = 0;
+ double eqn = a * 1 + b;
+ double valueDropped = a * alphaPrime + b;
+ for(int i=0; i<100; i++ ){
+ double v = out.getDouble(i);
+ if(v >= valueDropped - 1e-6 && v <= valueDropped + 1e-6){
+ countValueDropped++;
+ } else if(v >= eqn - 1e-6 && v <= eqn + 1e-6){
+ countEqn++;
+ }
+
+ }
+
+ assertEquals(100, countValueDropped+ countEqn);
+ assertTrue(countValueDropped >= 25 && countValueDropped <= 75);
+ assertTrue(countEqn >= 25 && countEqn <= 75);
+ }
+
+}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java
index 7912e4a1622c..50c391156049 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java
@@ -1,10 +1,8 @@
package org.deeplearning4j.nn.conf.graph;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.ActivationLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
@@ -19,8 +17,10 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Map;
@@ -175,7 +175,7 @@ public void testElementWiseVertexFullAdd() {
int midsz = 13;
int outputsz = 11;
ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER)
- .biasInit(0.0).updater(Updater.SGD)
+ .biasInit(0.0).updater(new Sgd())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder()
.addInputs("input1", "input2", "input3")
.addLayer("dense1",
@@ -240,7 +240,7 @@ public void testElementWiseVertexFullAdd() {
expect.addi(Transforms.sigmoid(middle.mmul(output_W).addi(output_b.repmat(batchsz, 1))));
- INDArray output = nullsafe(cg.getOutputLayer(0).activate());
+ INDArray output = nullsafe(cg.output(input1, input2, input3)[0]);
Assert.assertEquals(0.0, mse(output, expect), this.epsilon);
@@ -350,7 +350,7 @@ public void testElementWiseVertexFullProduct() {
int midsz = 13;
int outputsz = 11;
ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER)
- .biasInit(0.0).updater(Updater.SGD)
+ .biasInit(0.0).updater(new Sgd())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder()
.addInputs("input1", "input2", "input3")
.addLayer("dense1",
@@ -415,7 +415,7 @@ public void testElementWiseVertexFullProduct() {
expect.addi(Transforms.sigmoid(middle.mmul(output_W).addi(output_b.repmat(batchsz, 1))));
- INDArray output = nullsafe(cg.getOutputLayer(0).activate());
+ INDArray output = nullsafe(cg.output(input1, input2, input3)[0]);
Assert.assertEquals(0.0, mse(output, expect), this.epsilon);
@@ -524,7 +524,7 @@ public void testElementWiseVertexFullSubtract() {
int midsz = 13;
int outputsz = 11;
ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER)
- .biasInit(0.0).updater(Updater.SGD)
+ .biasInit(0.0).updater(new Sgd())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder()
.addInputs("input1", "input2")
.addLayer("dense1",
@@ -579,7 +579,7 @@ public void testElementWiseVertexFullSubtract() {
expect.addi(Transforms.sigmoid(middle.mmul(output_W).addi(output_b.repmat(batchsz, 1))));
- INDArray output = nullsafe(cg.getOutputLayer(0).activate());
+ INDArray output = nullsafe(cg.output(input1, input2)[0]);
Assert.assertEquals(0.0, mse(output, expect), this.epsilon);
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java
index 57cb536d4f3e..27cef6c1ee15 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java
@@ -1,10 +1,8 @@
package org.deeplearning4j.nn.conf.graph;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.ActivationLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
@@ -19,7 +17,9 @@
import org.nd4j.linalg.activations.impl.ActivationTanH;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Map;
import java.util.TreeMap;
@@ -121,7 +121,7 @@ public void testComprehensive() {
{0.55, 0.60, 0.65, 0.70, 0.75}, {0.80, 0.85, 0.90, 0.95, 0.99}});
ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER)
- .learningRate(0.01).updater(Updater.SGD)
+ .updater(new Sgd(0.01))
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder()
.addInputs("input")
.addLayer("denselayer",
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java
index 61d8944450c6..bc3b272cb8cf 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java
@@ -2,9 +2,9 @@
import org.deeplearning4j.nn.conf.GradientNormalization;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.Distribution;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.RBM.HiddenUnit;
import org.deeplearning4j.nn.conf.layers.RBM.VisibleUnit;
import org.deeplearning4j.nn.weights.WeightInit;
@@ -14,6 +14,8 @@
import org.nd4j.linalg.activations.impl.ActivationSoftmax;
import org.nd4j.linalg.activations.impl.ActivationTanH;
import org.nd4j.linalg.convolution.Convolution;
+import org.nd4j.linalg.learning.config.AdaGrad;
+import org.nd4j.linalg.learning.config.IUpdater;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
import java.io.*;
@@ -45,7 +47,7 @@ public class LayerBuilderTest {
double corruptionLevel = 0.5;
Distribution dist = new NormalDistribution(1.0, 0.1);
double dropOut = 0.1;
- Updater updater = Updater.ADAGRAD;
+ IUpdater updater = new AdaGrad();
GradientNormalization gradNorm = GradientNormalization.ClipL2PerParamType;
double gradNormThreshold = 8;
@@ -60,8 +62,8 @@ public void testLayer() throws Exception {
assertEquals(act, layer.getActivationFn());
assertEquals(weight, layer.getWeightInit());
assertEquals(dist, layer.getDist());
- assertEquals(dropOut, layer.getDropOut(), DELTA);
- assertEquals(updater, layer.getUpdater());
+ assertEquals(new Dropout(dropOut), layer.getIDropout());
+ assertEquals(updater, layer.getIUpdater());
assertEquals(gradNorm, layer.getGradientNormalization());
assertEquals(gradNormThreshold, layer.getGradientNormalizationThreshold(), 0.0);
}
@@ -226,7 +228,7 @@ private void checkSerialization(Layer layer) throws Exception {
assertEquals("unequal YAML serialization", confExpected.getLayer(), confActual.getLayer());
// check the layer's use of callSuper on equals method
- confActual.getLayer().setDropOut(new java.util.Random().nextDouble());
+ confActual.getLayer().setIDropout(new Dropout(new java.util.Random().nextDouble()));
assertNotEquals("broken equals method (missing callSuper?)", confExpected.getLayer(), confActual.getLayer());
}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java
index 58faa4dec172..d20544571175 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java
@@ -1,8 +1,11 @@
package org.deeplearning4j.nn.conf.layers;
-import org.deeplearning4j.nn.conf.*;
+import org.deeplearning4j.nn.conf.GradientNormalization;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.junit.Test;
@@ -11,11 +14,14 @@
import org.nd4j.linalg.learning.config.Adam;
import org.nd4j.linalg.learning.config.Nesterovs;
import org.nd4j.linalg.learning.config.RmsProp;
+import org.nd4j.linalg.schedule.MapSchedule;
+import org.nd4j.linalg.schedule.ScheduleType;
import java.util.HashMap;
import java.util.Map;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
public class LayerConfigTest {
@@ -101,6 +107,7 @@ public void testWeightBiasInitLayerwiseOverride() {
assertEquals(0, ((BaseLayer) conf.getConf(1).getLayer()).getBiasInit(), 0.0);
}
+ /*
@Test
public void testLrL1L2LayerwiseOverride() {
//Idea: Set some common values for all layers. Then selectively override
@@ -128,7 +135,7 @@ public void testLrL1L2LayerwiseOverride() {
assertEquals(0.2, ((BaseLayer) conf.getConf(1).getLayer()).getLearningRate(), 0.0);
//L1 and L2 without layerwise override:
- conf = new NeuralNetConfiguration.Builder().regularization(true).l1(0.1).l2(0.2).list()
+ conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.2).list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
.layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
net = new MultiLayerNetwork(conf);
@@ -140,7 +147,7 @@ public void testLrL1L2LayerwiseOverride() {
assertEquals(0.2, ((BaseLayer) conf.getConf(1).getLayer()).getL2(), 0.0);
//L1 and L2 with layerwise override:
- conf = new NeuralNetConfiguration.Builder().regularization(true).l1(0.1).l2(0.2).list()
+ conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.2).list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l1(0.9).build())
.layer(1, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.8).build()).build();
net = new MultiLayerNetwork(conf);
@@ -150,7 +157,7 @@ public void testLrL1L2LayerwiseOverride() {
assertEquals(0.1, ((BaseLayer) conf.getConf(1).getLayer()).getL1(), 0.0);
assertEquals(0.2, ((BaseLayer) conf.getConf(0).getLayer()).getL2(), 0.0);
assertEquals(0.8, ((BaseLayer) conf.getConf(1).getLayer()).getL2(), 0.0);
- }
+ }*/
@@ -162,8 +169,8 @@ public void testDropoutLayerwiseOverride() {
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
- assertEquals(1.0, conf.getConf(0).getLayer().getDropOut(), 0.0);
- assertEquals(1.0, conf.getConf(1).getLayer().getDropOut(), 0.0);
+ assertEquals(new Dropout(1.0), conf.getConf(0).getLayer().getIDropout());
+ assertEquals(new Dropout(1.0), conf.getConf(1).getLayer().getIDropout());
conf = new NeuralNetConfiguration.Builder().dropOut(1.0).list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
@@ -172,8 +179,8 @@ public void testDropoutLayerwiseOverride() {
net = new MultiLayerNetwork(conf);
net.init();
- assertEquals(1.0, conf.getConf(0).getLayer().getDropOut(), 0.0);
- assertEquals(2.0, conf.getConf(1).getLayer().getDropOut(), 0.0);
+ assertEquals(new Dropout(1.0), conf.getConf(0).getLayer().getIDropout());
+ assertEquals(new Dropout(2.0), conf.getConf(1).getLayer().getIDropout());
}
@Test
@@ -181,68 +188,55 @@ public void testMomentumLayerwiseOverride() {
Map testMomentumAfter = new HashMap<>();
testMomentumAfter.put(0, 0.1);
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NESTEROVS).momentum(1.0)
- .momentumAfter(testMomentumAfter).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter)))
+ .list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
.layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
- assertEquals(1.0, ((BaseLayer) conf.getConf(0).getLayer()).getMomentum(), 0.0);
- assertEquals(1.0, ((BaseLayer) conf.getConf(1).getLayer()).getMomentum(), 0.0);
- assertEquals(0.1, ((BaseLayer) conf.getConf(0).getLayer()).getMomentumSchedule().get(0), 0.0);
- assertEquals(0.1, ((BaseLayer) conf.getConf(1).getLayer()).getMomentumSchedule().get(0), 0.0);
+ assertEquals(0.1, ((Nesterovs)((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0);
+ assertEquals(0.1, ((Nesterovs)((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0);
Map testMomentumAfter2 = new HashMap<>();
testMomentumAfter2.put(0, 0.2);
- conf = new NeuralNetConfiguration.Builder().updater(Updater.NESTEROVS).momentum(1.0)
- .momentumAfter(testMomentumAfter).list()
+ conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter) ))
+ .list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder()
- .nIn(2).nOut(2).momentum(2.0).momentumAfter(testMomentumAfter2).build())
+ .nIn(2).nOut(2).updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter2))).build())
.build();
net = new MultiLayerNetwork(conf);
net.init();
-
- assertEquals(1.0, ((BaseLayer) conf.getConf(0).getLayer()).getMomentum(), 0.0);
- assertEquals(2.0, ((BaseLayer) conf.getConf(1).getLayer()).getMomentum(), 0.0);
- assertEquals(1.0, ((Nesterovs) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getMomentum(), 0.0);
- assertEquals(2.0, ((Nesterovs) ((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getMomentum(), 0.0);
- assertEquals(0.1, ((BaseLayer) conf.getConf(0).getLayer()).getMomentumSchedule().get(0), 0.0);
- assertEquals(0.2, ((BaseLayer) conf.getConf(1).getLayer()).getMomentumSchedule().get(0), 0.0);
- assertEquals(0.1, ((Nesterovs) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getMomentumSchedule()
- .get(0), 0.0);
- assertEquals(0.2, ((Nesterovs) ((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getMomentumSchedule()
- .get(0), 0.0);
-
+ assertEquals(0.1, ((Nesterovs)((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0);
+ assertEquals(0.2, ((Nesterovs)((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0);
}
@Test
public void testUpdaterRhoRmsDecayLayerwiseOverride() {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.ADADELTA).rho(0.5).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new AdaDelta(0.5, 0.9)).list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).rho(0.01).build()).build();
+ .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new AdaDelta(0.01,0.9)).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
- assertEquals("ADADELTA", ((BaseLayer) conf.getConf(0).getLayer()).getUpdater().toString());
- assertEquals("ADADELTA", ((BaseLayer) conf.getConf(1).getLayer()).getUpdater().toString());
- assertEquals(0.5, ((BaseLayer) conf.getConf(0).getLayer()).getRho(), 0.0);
- assertEquals(0.01, ((BaseLayer) conf.getConf(1).getLayer()).getRho(), 0.0);
+ assertTrue(((BaseLayer) conf.getConf(0).getLayer()).getIUpdater() instanceof AdaDelta);
+ assertTrue(((BaseLayer) conf.getConf(1).getLayer()).getIUpdater() instanceof AdaDelta);
+ assertEquals(0.5, ((AdaDelta)((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getRho(), 0.0);
+ assertEquals(0.01, ((AdaDelta)((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getRho(), 0.0);
- conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP).rmsDecay(2.0).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).rmsDecay(1.0).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(Updater.ADADELTA).rho(0.5).build())
+ conf = new NeuralNetConfiguration.Builder().updater(new RmsProp(1.0, 2.0, RmsProp.DEFAULT_RMSPROP_EPSILON)).list()
+ .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).updater(new RmsProp(1.0, 1.0, RmsProp.DEFAULT_RMSPROP_EPSILON)).build())
+ .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new AdaDelta.Builder().rho(0.5).build()).build())
.build();
net = new MultiLayerNetwork(conf);
net.init();
- assertEquals("RMSPROP", ((BaseLayer) conf.getConf(0).getLayer()).getUpdater().toString());
- assertEquals("ADADELTA", ((BaseLayer) conf.getConf(1).getLayer()).getUpdater().toString());
- assertEquals(1.0, ((BaseLayer) conf.getConf(0).getLayer()).getRmsDecay(), 0.0);
- assertEquals(0.5, ((BaseLayer) conf.getConf(1).getLayer()).getRho(), 0.0);
+ assertTrue(((BaseLayer) conf.getConf(0).getLayer()).getIUpdater() instanceof RmsProp);
+ assertTrue(((BaseLayer) conf.getConf(1).getLayer()).getIUpdater() instanceof AdaDelta);
assertEquals(1.0, ((RmsProp) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getRmsDecay(), 0.0);
assertEquals(0.5, ((AdaDelta) ((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getRho(), 0.0);
}
@@ -250,20 +244,15 @@ public void testUpdaterRhoRmsDecayLayerwiseOverride() {
@Test
public void testUpdaterAdamParamsLayerwiseOverride() {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.ADAM).adamMeanDecay(0.5)
- .adamVarDecay(0.5).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).adamMeanDecay(0.6).adamVarDecay(0.7).build())
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new Adam(1.0, 0.5, 0.5, 1e-8))
+ .list()
+ .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
+ .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new Adam(1.0, 0.6, 0.7, 1e-8)).build())
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
- assertEquals("ADAM", ((BaseLayer) conf.getConf(0).getLayer()).getUpdater().toString());
- assertEquals("ADAM", ((BaseLayer) conf.getConf(1).getLayer()).getUpdater().toString(), "ADAM");
- assertEquals(0.5, ((BaseLayer) conf.getConf(0).getLayer()).getAdamMeanDecay(), 0.0);
- assertEquals(0.6, ((BaseLayer) conf.getConf(1).getLayer()).getAdamMeanDecay(), 0.0);
- assertEquals(0.5, ((BaseLayer) conf.getConf(0).getLayer()).getAdamVarDecay(), 0.0);
- assertEquals(0.7, ((BaseLayer) conf.getConf(1).getLayer()).getAdamVarDecay(), 0.0);
-
assertEquals(0.5, ((Adam) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getBeta1(), 0.0);
assertEquals(0.6, ((Adam) ((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getBeta1(), 0.0);
assertEquals(0.5, ((Adam) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getBeta2(), 0.0);
@@ -309,28 +298,15 @@ public void testGradientNormalizationLayerwiseOverride() {
assertEquals(2.5, ((BaseLayer) conf.getConf(1).getLayer()).getGradientNormalizationThreshold(), 0.0);
}
- @Test
- public void testLearningRatePolicyNone() {
- double lr = 2;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.None).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
-
- assertEquals(LearningRatePolicy.None, conf.getConf(0).getLearningRatePolicy());
- assertEquals(LearningRatePolicy.None, conf.getConf(1).getLearningRatePolicy());
-
- }
-
+ /*
@Test
public void testLearningRatePolicyExponential() {
double lr = 2;
double lrDecayRate = 5;
int iterations = 1;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr)
+ .updater(Updater.SGD)
.learningRateDecayPolicy(LearningRatePolicy.Exponential).lrPolicyDecayRate(lrDecayRate).list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
.layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
@@ -428,5 +404,5 @@ public void testLearningRatePolicySigmoid() {
assertEquals(steps, conf.getConf(1).getLrPolicySteps(), 0.0);
}
-
+*/
}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java
index e813512d023f..7ce1787c6e06 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java
@@ -1,16 +1,26 @@
package org.deeplearning4j.nn.conf.layers;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
-import org.deeplearning4j.nn.conf.*;
+import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.Distribution;
import org.deeplearning4j.nn.conf.distribution.GaussianDistribution;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
+import org.deeplearning4j.nn.conf.weightnoise.DropConnect;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.junit.Ignore;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
+import org.nd4j.linalg.learning.config.Adam;
+import org.nd4j.linalg.learning.config.Nesterovs;
+import org.nd4j.linalg.learning.config.RmsProp;
+import org.nd4j.linalg.learning.config.Sgd;
+import org.nd4j.linalg.schedule.MapSchedule;
+import org.nd4j.linalg.schedule.ScheduleType;
import java.util.HashMap;
import java.util.Map;
@@ -23,7 +33,7 @@ public class LayerConfigValidationTest {
@Test
public void testDropConnect() {
// Warning thrown only since some layers may not have l1 or l2
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).useDropConnect(true)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)).weightNoise(new DropConnect(0.5))
.list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
.layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
@@ -34,7 +44,7 @@ public void testDropConnect() {
@Test
public void testL1L2NotSet() {
// Warning thrown only since some layers may not have l1 or l2
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).regularization(true)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.3))
.list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
.layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
@@ -44,7 +54,7 @@ public void testL1L2NotSet() {
@Test(expected = IllegalStateException.class)
@Ignore //Old assumption: throw exception on l1 but no regularization. Current design: warn, not exception
public void testRegNotSetL1Global() {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).l1(0.5).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)).l1(0.5).list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
.layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
@@ -54,7 +64,7 @@ public void testRegNotSetL1Global() {
@Test(expected = IllegalStateException.class)
@Ignore //Old assumption: throw exception on l1 but no regularization. Current design: warn, not exception
public void testRegNotSetL2Local() {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)).list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build())
.layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
@@ -65,7 +75,7 @@ public void testRegNotSetL2Local() {
public void testWeightInitDistNotSet() {
// Warning thrown only since global dist can be set with a different weight init locally
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(0.3).dist(new GaussianDistribution(1e-3, 2))
+ new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)).dist(new GaussianDistribution(1e-3, 2))
.list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
.layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
@@ -79,134 +89,18 @@ public void testNesterovsNotSetGlobal() {
testMomentumAfter.put(0, 0.1);
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().momentum(1.0).momentumAfter(testMomentumAfter).list()
+ new NeuralNetConfiguration.Builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter))).list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
.layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
}
- @Test
- public void testNesterovsNotSetLocalMomentum() {
- // Warnings only thrown
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).momentum(0.3).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
- }
-
- @Test
- public void testNesterovsNotSetLocalMuAfter() {
- // Warnings only thrown
- Map testMomentumAfter = new HashMap<>();
- testMomentumAfter.put(0, 0.1);
-
- MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2)
- .momentumAfter(testMomentumAfter).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
- }
-
-
- @Test
- public void testAdaDeltaValidation() {
- // Warnings only thrown
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().rho(0.5).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).rho(0.01).build()).build();
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
-
- }
-
- @Test
- public void testRmsPropValidation() {
- // Warnings only thrown
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().rmsDecay(2.0).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).rmsDecay(1.0).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(Updater.ADADELTA).rho(0.5).build())
- .build();
-
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
- }
-
-
- @Test
- public void testAdamValidation() {
- // Warnings only thrown
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().adamMeanDecay(0.5).adamVarDecay(0.5).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).adamMeanDecay(0.6).adamVarDecay(0.7).build())
- .build();
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
- }
-
-
- @Test(expected = IllegalStateException.class)
- public void testLRPolicyMissingDecayRate() {
- double lr = 2;
- double power = 3;
- int iterations = 1;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyPower(power).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
- }
-
- @Test(expected = IllegalStateException.class)
- public void testLRPolicyMissingPower() {
- double lr = 2;
- double lrDecayRate = 5;
- int iterations = 1;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(lrDecayRate).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
-
- }
-
- @Test(expected = IllegalStateException.class)
- public void testLRPolicyMissingSteps() {
- double lr = 2;
- double lrDecayRate = 5;
- int iterations = 1;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(lrDecayRate).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
-
- }
-
- @Test(expected = IllegalStateException.class)
- public void testLRPolicyMissingSchedule() {
- double lr = 2;
- double lrDecayRate = 5;
- int iterations = 1;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.Schedule).lrPolicyDecayRate(lrDecayRate).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
- }
-
@Test
public void testCompGraphNullLayer() {
ComputationGraphConfiguration.GraphBuilder gb = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.01)
- .iterations(3).seed(42).miniBatch(false).l1(0.2).l2(0.2).rmsDecay(0.3).regularization(true)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.01))
+ .iterations(3).seed(42).miniBatch(false).l1(0.2).l2(0.2)
/* Graph Builder */
.updater(Updater.RMSPROP).graphBuilder().addInputs("in")
.addLayer("L" + 1,
@@ -236,22 +130,22 @@ public void testPredefinedConfigValues() {
double expectedL2 = 0.0;
// Nesterovs Updater
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).updater(Updater.NESTEROVS)
- .regularization(true).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).momentum(0.4).build()).build();
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(0.9))
+ .list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build())
+ .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new Nesterovs(0.3, 0.4)).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
BaseLayer layerConf = (BaseLayer) net.getLayer(0).conf().getLayer();
- assertEquals(expectedMomentum, layerConf.getMomentum(), 1e-3);
+ assertEquals(expectedMomentum, ((Nesterovs)layerConf.getIUpdater()).getMomentum(), 1e-3);
assertEquals(expectedL1, layerConf.getL1(), 1e-3);
assertEquals(0.5, layerConf.getL2(), 1e-3);
BaseLayer layerConf1 = (BaseLayer) net.getLayer(1).conf().getLayer();
- assertEquals(0.4, layerConf1.getMomentum(), 1e-3);
+ assertEquals(0.4, ((Nesterovs)layerConf1.getIUpdater()).getMomentum(), 1e-3);
// Adam Updater
- conf = new NeuralNetConfiguration.Builder().learningRate(0.3).updater(Updater.ADAM).regularization(true)
+ conf = new NeuralNetConfiguration.Builder().updater(new Adam(0.3))
.weightInit(WeightInit.DISTRIBUTION).list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).l1(0.3).build())
.layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
@@ -263,27 +157,27 @@ public void testPredefinedConfigValues() {
assertEquals(0.5, layerConf.getL2(), 1e-3);
layerConf1 = (BaseLayer) net.getLayer(1).conf().getLayer();
- assertEquals(expectedAdamMeanDecay, layerConf1.getAdamMeanDecay(), 1e-3);
- assertEquals(expectedAdamVarDecay, layerConf1.getAdamVarDecay(), 1e-3);
+ assertEquals(expectedAdamMeanDecay, ((Adam)layerConf1.getIUpdater()).getBeta1(), 1e-3);
+ assertEquals(expectedAdamVarDecay, ((Adam)layerConf1.getIUpdater()).getBeta2(), 1e-3);
assertEquals(expectedDist, layerConf1.getDist());
// l1 & l2 local should still be set whether regularization true or false
assertEquals(expectedL1, layerConf1.getL1(), 1e-3);
assertEquals(expectedL2, layerConf1.getL2(), 1e-3);
//RMSProp Updater
- conf = new NeuralNetConfiguration.Builder().learningRate(0.3).updater(Updater.RMSPROP).list()
+ conf = new NeuralNetConfiguration.Builder().updater(new RmsProp(0.3)).list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).rmsDecay(0.4).build()).build();
+ .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new RmsProp(0.3, 0.4, RmsProp.DEFAULT_RMSPROP_EPSILON)).build()).build();
net = new MultiLayerNetwork(conf);
net.init();
layerConf = (BaseLayer) net.getLayer(0).conf().getLayer();
- assertEquals(expectedRmsDecay, layerConf.getRmsDecay(), 1e-3);
+ assertEquals(expectedRmsDecay, ((RmsProp)layerConf.getIUpdater()).getRmsDecay(), 1e-3);
assertEquals(expectedL1, layerConf.getL1(), 1e-3);
assertEquals(expectedL2, layerConf.getL2(), 1e-3);
layerConf1 = (BaseLayer) net.getLayer(1).conf().getLayer();
- assertEquals(0.4, layerConf1.getRmsDecay(), 1e-3);
+ assertEquals(0.4, ((RmsProp)layerConf1.getIUpdater()).getRmsDecay(), 1e-3);
}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java
index 95bbddaab72e..56295ae29640 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java
@@ -44,7 +44,7 @@ public void testCustomPreprocessor() {
//Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works...
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(0.1).list()
+ new NeuralNetConfiguration.Builder().list()
.layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build())
.layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10)
.nOut(10).build())
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java
index b0b7d0f08640..d2575c02acf6 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java
@@ -1,10 +1,8 @@
package org.deeplearning4j.nn.conf.preprocessor;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.FeedForwardLayer;
import org.deeplearning4j.nn.conf.layers.GravesLSTM;
@@ -432,27 +430,22 @@ public void testAutoAdditionOfPreprocessors() {
@Test
public void testCnnToDense() {
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder()
- //.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .learningRate(0.01) // default
- //.momentum(0.9)
- .regularization(true)
- .list().layer(0,
- new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(
- 4, 4) // 28*28*1 => 15*15*10
- .nIn(1).nOut(10).padding(2, 2)
- .stride(2, 2)
- .weightInit(WeightInit.RELU)
- .activation(Activation.RELU)
- .build())
- .layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder()
- .activation(Activation.RELU).nOut(200).build())
- .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(200)
- .nOut(5).weightInit(WeightInit.RELU)
- .activation(Activation.SOFTMAX).updater(Updater.SGD).build())
- .setInputType(InputType.convolutionalFlat(28, 28, 1)).backprop(true)
- .pretrain(false).build();
+ new NeuralNetConfiguration.Builder()
+ .list().layer(0,
+ new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(
+ 4, 4) // 28*28*1 => 15*15*10
+ .nIn(1).nOut(10).padding(2, 2)
+ .stride(2, 2)
+ .weightInit(WeightInit.RELU)
+ .activation(Activation.RELU)
+ .build())
+ .layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder()
+ .activation(Activation.RELU).nOut(200).build())
+ .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(200)
+ .nOut(5).weightInit(WeightInit.RELU)
+ .activation(Activation.SOFTMAX).build())
+ .setInputType(InputType.convolutionalFlat(28, 28, 1)).backprop(true)
+ .pretrain(false).build();
assertNotNull(conf.getInputPreProcess(0));
assertNotNull(conf.getInputPreProcess(1));
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/custom/MyCustomPreprocessor.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/custom/MyCustomPreprocessor.java
index c024619ef2b7..611ad9434ffa 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/custom/MyCustomPreprocessor.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/custom/MyCustomPreprocessor.java
@@ -1,11 +1,11 @@
package org.deeplearning4j.nn.conf.preprocessor.custom;
import lombok.EqualsAndHashCode;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* Created by Alex on 09/09/2016.
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java
new file mode 100644
index 000000000000..041f8f29b70f
--- /dev/null
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java
@@ -0,0 +1,250 @@
+package org.deeplearning4j.nn.conf.weightnoise;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import org.deeplearning4j.TestUtils;
+import org.deeplearning4j.datasets.iterator.ExistingDataSetIterator;
+import org.deeplearning4j.nn.api.Layer;
+import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
+import org.deeplearning4j.nn.conf.layers.BaseLayer;
+import org.deeplearning4j.nn.conf.layers.DenseLayer;
+import org.deeplearning4j.nn.conf.layers.OutputLayer;
+import org.deeplearning4j.nn.graph.ComputationGraph;
+import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
+import org.deeplearning4j.nn.weights.WeightInit;
+import org.junit.Test;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.impl.accum.MatchCondition;
+import org.nd4j.linalg.dataset.DataSet;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.indexing.conditions.Conditions;
+import org.nd4j.linalg.schedule.ScheduleType;
+import org.nd4j.linalg.schedule.SigmoidSchedule;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.*;
+
+public class TestWeightNoise {
+
+ @Test
+ public void testWeightNoiseConfigJson() {
+ IWeightNoise[] weightNoises = new IWeightNoise[]{
+ new DropConnect(0.5),
+ new DropConnect(new SigmoidSchedule(ScheduleType.ITERATION, 0.5, 0.5, 100)),
+ new WeightNoise(new NormalDistribution(0, 0.1))
+ };
+
+ for (IWeightNoise wn : weightNoises) {
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .weightNoise(wn)
+ .list()
+ .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
+ .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build())
+ .layer(new OutputLayer.Builder().nIn(10).nOut(10).build())
+ .build();
+
+ MultiLayerNetwork net = new MultiLayerNetwork(conf);
+ net.init();
+
+ assertEquals(wn, ((BaseLayer) net.getLayer(0).conf().getLayer()).getWeightNoise());
+ assertEquals(new DropConnect(0.25), ((BaseLayer) net.getLayer(1).conf().getLayer()).getWeightNoise());
+ assertEquals(wn, ((BaseLayer) net.getLayer(2).conf().getLayer()).getWeightNoise());
+
+ TestUtils.testModelSerialization(net);
+
+
+ ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder()
+ .weightNoise(wn)
+ .graphBuilder()
+ .addInputs("in")
+ .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in")
+ .layer("1", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build(), "0")
+ .layer("2", new OutputLayer.Builder().nIn(10).nOut(10).build(), "1")
+ .setOutputs("2")
+ .build();
+
+ ComputationGraph graph = new ComputationGraph(conf2);
+ graph.init();
+
+ assertEquals(wn, ((BaseLayer) graph.getLayer(0).conf().getLayer()).getWeightNoise());
+ assertEquals(new DropConnect(0.25), ((BaseLayer) graph.getLayer(1).conf().getLayer()).getWeightNoise());
+ assertEquals(wn, ((BaseLayer) graph.getLayer(2).conf().getLayer()).getWeightNoise());
+
+ TestUtils.testModelSerialization(graph);
+ }
+ }
+
+
+ @Test
+ public void testCalls() {
+
+ List trainData = new ArrayList<>();
+ trainData.add(new DataSet(Nd4j.rand(5, 10), Nd4j.rand(5, 10)));
+ trainData.add(new DataSet(Nd4j.rand(5, 10), Nd4j.rand(5, 10)));
+ trainData.add(new DataSet(Nd4j.rand(5, 10), Nd4j.rand(5, 10)));
+
+ List> expCalls = new ArrayList<>();
+ for (int i = 0; i < 3; i++) {
+ List expCallsForLayer = new ArrayList<>();
+ expCallsForLayer.add(new WeightNoiseCall(i, "W", 0, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(i, "b", 0, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(i, "W", 1, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(i, "b", 1, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(i, "W", 2, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(i, "b", 2, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(i, "W", 3, 1, true));
+ expCallsForLayer.add(new WeightNoiseCall(i, "b", 3, 1, true));
+ expCallsForLayer.add(new WeightNoiseCall(i, "W", 4, 1, true));
+ expCallsForLayer.add(new WeightNoiseCall(i, "b", 4, 1, true));
+ expCallsForLayer.add(new WeightNoiseCall(i, "W", 5, 1, true));
+ expCallsForLayer.add(new WeightNoiseCall(i, "b", 5, 1, true));
+
+ expCallsForLayer.add(new WeightNoiseCall(i, "W", 5, 1, false));
+ expCallsForLayer.add(new WeightNoiseCall(i, "b", 5, 1, false));
+
+ expCalls.add(expCallsForLayer);
+ }
+
+
+ CustomWeightNoise wn1 = new CustomWeightNoise();
+ CustomWeightNoise wn2 = new CustomWeightNoise();
+ CustomWeightNoise wn3 = new CustomWeightNoise();
+
+ List list = Arrays.asList(wn1, wn2, wn3);
+
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .list()
+ .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn1).build())
+ .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn2).build())
+ .layer(new OutputLayer.Builder().nIn(10).nOut(10).weightNoise(wn3).build())
+ .build();
+
+ MultiLayerNetwork net = new MultiLayerNetwork(conf);
+ net.init();
+
+ net.fit(new ExistingDataSetIterator(trainData.iterator()));
+ net.fit(new ExistingDataSetIterator(trainData.iterator()));
+ net.output(trainData.get(0).getFeatures());
+
+ for (int i = 0; i < 3; i++) {
+ assertEquals(expCalls.get(i), list.get(i).getAllCalls());
+ }
+
+
+ wn1 = new CustomWeightNoise();
+ wn2 = new CustomWeightNoise();
+ wn3 = new CustomWeightNoise();
+ list = Arrays.asList(wn1, wn2, wn3);
+
+ ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder()
+ .graphBuilder()
+ .addInputs("in")
+ .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn1).build(), "in")
+ .layer("1", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn2).build(), "0")
+ .layer("2", new OutputLayer.Builder().nIn(10).nOut(10).weightNoise(wn3).build(), "1")
+ .setOutputs("2")
+ .build();
+
+ ComputationGraph graph = new ComputationGraph(conf2);
+ graph.init();
+
+ int[] layerIdxs = new int[]{graph.getLayer(0).getIndex(), graph.getLayer(1).getIndex(), graph.getLayer(2).getIndex()};
+
+ expCalls.clear();
+ for (int i = 0; i < 3; i++) {
+ List expCallsForLayer = new ArrayList<>();
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 0, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 0, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 1, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 1, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 2, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 2, 0, true));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 3, 1, true));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 3, 1, true));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 4, 1, true));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 4, 1, true));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 5, 1, true));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 5, 1, true));
+
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 5, 1, false));
+ expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 5, 1, false));
+
+ expCalls.add(expCallsForLayer);
+ }
+
+ graph.fit(new ExistingDataSetIterator(trainData.iterator()));
+ graph.fit(new ExistingDataSetIterator(trainData.iterator()));
+ graph.output(trainData.get(0).getFeatures());
+
+ for (int i = 0; i < 3; i++) {
+ assertEquals(String.valueOf(i), expCalls.get(i), list.get(i).getAllCalls());
+ }
+
+ }
+
+ @Data
+ private static class CustomWeightNoise implements IWeightNoise {
+
+ private List allCalls = new ArrayList<>();
+
+ @Override
+ public INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train) {
+ allCalls.add(new WeightNoiseCall(layer.getIndex(), paramKey, iteration, epoch, train));
+ return layer.getParam(paramKey);
+ }
+
+ @Override
+ public IWeightNoise clone() {
+ return new CustomWeightNoise();
+ }
+ }
+
+ @AllArgsConstructor
+ @Data
+ private static class WeightNoiseCall {
+ private int layerIdx;
+ private String paramKey;
+ private int iter;
+ private int epoch;
+ private boolean train;
+ }
+
+
+ @Test
+ public void testDropConnectValues() {
+ Nd4j.getRandom().setSeed(12345);
+
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .weightInit(WeightInit.ONES)
+ .list()
+ .layer(new OutputLayer.Builder().nIn(10).nOut(10).build())
+ .build();
+ MultiLayerNetwork net = new MultiLayerNetwork(conf);
+ net.init();
+
+ Layer l = net.getLayer(0);
+ DropConnect d = new DropConnect(0.5);
+
+ INDArray outTest = d.getParameter(l, "W", 0, 0, false);
+ assertTrue(l.getParam("W") == outTest); //Should be same object
+ INDArray outTrain = d.getParameter(l, "W", 0, 0, true);
+ assertNotEquals(l.getParam("W"), outTrain);
+
+ assertEquals(l.getParam("W"), Nd4j.ones(10, 10));
+
+ int countZeros = Nd4j.getExecutioner().exec(new MatchCondition(outTrain, Conditions.equals(0))).z().getInt(0);
+ int countOnes = Nd4j.getExecutioner().exec(new MatchCondition(outTrain, Conditions.equals(1))).z().getInt(0);
+
+ assertEquals(100, countZeros + countOnes); //Should only be 0 or 2
+ //Stochastic, but this should hold for most cases
+ assertTrue(countZeros >= 25 && countZeros <= 75);
+ assertTrue(countOnes >= 25 && countOnes <= 75);
+ }
+
+}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java
index c8b60f2b8ac4..0251f3e14742 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java
@@ -1,6 +1,5 @@
package org.deeplearning4j.nn.graph;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.iterator.IteratorDataSetIterator;
import org.deeplearning4j.datasets.iterator.IteratorMultiDataSetIterator;
import org.deeplearning4j.nn.api.Layer;
@@ -27,6 +26,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.lossfunctions.LossFunctions;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Collections;
import java.util.Map;
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java
index 82334044749a..3317b7b7a56e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java
@@ -1,6 +1,5 @@
package org.deeplearning4j.nn.graph;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.iterator.impl.ListDataSetIterator;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
@@ -18,6 +17,7 @@
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.Arrays;
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java
index 1855faf4e4b4..7c4e391a401d 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java
@@ -16,6 +16,7 @@
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.preprocessor.*;
+import org.deeplearning4j.nn.conf.weightnoise.DropConnect;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@@ -32,6 +33,8 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.io.ClassPathResource;
+import org.nd4j.linalg.learning.config.AdaGrad;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.primitives.Pair;
@@ -247,7 +250,7 @@ public void testIrisFitMultiDataSetIterator() throws Exception {
.addInput("iris", 0, 3).addOutputOneHot("iris", 4, 3).build();
ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.1)
+ .updater(new Sgd(0.1))
.graphBuilder().addInputs("in")
.addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out",
new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3)
@@ -499,7 +502,7 @@ public void testPreTraining() {
ComputationGraphConfiguration conf =
new NeuralNetConfiguration.Builder().iterations(100)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .iterations(1).updater(Updater.SGD).learningRate(1e-6).regularization(true)
+ .iterations(1).updater(new Sgd(1e-6))
.l2(2e-4).graphBuilder().addInputs("in")
.addLayer("layer0",
new RBM.Builder(RBM.HiddenUnit.GAUSSIAN,
@@ -554,8 +557,9 @@ public void testScoreExamples() {
int nIn = 5;
int nOut = 6;
ComputationGraphConfiguration conf =
- new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01).l2(0.01)
- .learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER)
+ new NeuralNetConfiguration.Builder().seed(12345).l1(0.01).l2(0.01)
+ .updater(new Sgd(0.1))
+ .activation(Activation.TANH).weightInit(WeightInit.XAVIER)
.graphBuilder().addInputs("in")
.addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(20).build(), "in")
.addLayer("1", new DenseLayer.Builder().nIn(20).nOut(30).build(), "0")
@@ -565,7 +569,7 @@ public void testScoreExamples() {
.setOutputs("2").build();
ComputationGraphConfiguration confNoReg =
- new NeuralNetConfiguration.Builder().seed(12345).learningRate(0.1).activation(Activation.TANH)
+ new NeuralNetConfiguration.Builder().seed(12345).updater(new Sgd(0.1)).activation(Activation.TANH)
.weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
.addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(20).build(), "in")
.addLayer("1", new DenseLayer.Builder().nIn(20).nOut(30).build(), "0")
@@ -619,8 +623,8 @@ public void testExternalErrors() {
INDArray outData = Nd4j.rand(3, 10);
Nd4j.getRandom().setSeed(12345);
- ComputationGraphConfiguration standard = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .updater(Updater.SGD).seed(12345).graphBuilder().addInputs("in")
+ ComputationGraphConfiguration standard = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
+ .seed(12345).graphBuilder().addInputs("in")
.addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in")
.addLayer("out", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10)
.nOut(10).build(), "l0")
@@ -630,8 +634,8 @@ public void testExternalErrors() {
Nd4j.getRandom().setSeed(12345);
- ComputationGraphConfiguration external = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .updater(Updater.SGD).seed(12345).graphBuilder().addInputs("in")
+ ComputationGraphConfiguration external = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
+ .seed(12345).graphBuilder().addInputs("in")
.addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("l0")
.pretrain(false).backprop(true).build();
@@ -903,9 +907,8 @@ public void testIterationCountAndPresistence() throws IOException {
@Test
public void printSummary() {
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .activation(Activation.IDENTITY)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
+ .activation(Activation.IDENTITY);
ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight")
.addLayer("denseCentre0", new DenseLayer.Builder().nIn(10).nOut(9).build(), "inCentre")
@@ -982,7 +985,7 @@ public void testSetOutputsMultipleCalls() {
public void testDropoutValidation() {
//At one point: this threw an exception due to incorrect validation
for (boolean dropConnect : new boolean[]{false, true}) {
- new NeuralNetConfiguration.Builder().regularization(true).useDropConnect(dropConnect).dropOut(0.5)
+ new NeuralNetConfiguration.Builder().weightNoise(new DropConnect(0.5))
.graphBuilder().setInputTypes(InputType.feedForward(1)).addInputs("input1")
.addLayer("output",
new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(1).nOut(1)
@@ -998,7 +1001,7 @@ public void testNoParamLayersL1L2() {
//Don't care about this being valid
ComputationGraphConfiguration c =
- new NeuralNetConfiguration.Builder().regularization(true).l1(0.5).l2(0.6).graphBuilder()
+ new NeuralNetConfiguration.Builder().l1(0.5).l2(0.6).graphBuilder()
.addInputs("in")
.addLayer("sub1", new SubsamplingLayer.Builder(2, 2).build(), "in")
.addLayer("sub2", new Subsampling1DLayer.Builder(2).build(), "sub1")
@@ -1147,35 +1150,29 @@ public void testSummary() {
int V_HEIGHT = 130;
int V_NFRAMES = 150;
ComputationGraphConfiguration confForArchitecture =
- new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l2(0.001) //l2 regularization on all layers
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .iterations(1).learningRate(0.4).graphBuilder()
+ new NeuralNetConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers
+ .updater(new AdaGrad(0.4)).graphBuilder()
.addInputs("in")
.addLayer("layer0", new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB
.nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(
- WeightInit.RELU)
- .updater(Updater.ADAGRAD).build(),"in") //Output: (130-10+0)/4+1 = 31 -> 31*31*30
+ WeightInit.RELU).build(),"in") //Output: (130-10+0)/4+1 = 31 -> 31*31*30
.addLayer("layer1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
.kernelSize(3, 3).stride(2, 2).build(),"layer0") //(31-3+0)/2+1 = 15
.addLayer("layer2", new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2)
.activation(Activation.RELU).weightInit(WeightInit.RELU)
.updater(Updater.ADAGRAD).build(), "layer1") //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490
.addLayer("layer3", new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50)
- .weightInit(WeightInit.RELU).updater(Updater.ADAGRAD)
- .gradientNormalization(
- GradientNormalization.ClipElementWiseAbsoluteValue)
- .gradientNormalizationThreshold(10).learningRate(0.5).build(), "layer2")
+ .weightInit(WeightInit.RELU).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
+ .gradientNormalizationThreshold(10).build(), "layer2")
.addLayer("layer4", new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50)
.nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD)
- .gradientNormalization(
- GradientNormalization.ClipElementWiseAbsoluteValue)
- .gradientNormalizationThreshold(10).learningRate(0.6)
+ .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
+ .gradientNormalizationThreshold(10)
.build(), "layer3")
.addLayer("layer5", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line
- .updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER)
- .gradientNormalization(
- GradientNormalization.ClipElementWiseAbsoluteValue)
+ .weightInit(WeightInit.XAVIER)
+ .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
.gradientNormalizationThreshold(10).build(), "layer4")
.setOutputs("layer5")
.inputPreProcessor("layer0", new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3))
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java
index 0d9a59f06f80..676aff406dfd 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java
@@ -3,7 +3,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.GravesLSTM;
@@ -18,6 +17,8 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.NoOp;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.Map;
@@ -45,7 +46,7 @@ public void testVariableLengthSimple() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(0.1).seed(12345).graphBuilder().addInputs("in")
+ .updater(new Sgd(0.1)).seed(12345).graphBuilder().addInputs("in")
.addLayer("0", new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(),
"in")
.addLayer("1", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE)
@@ -134,7 +135,7 @@ public void testInputMasking() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(0.1).seed(12345).graphBuilder().addInputs("in")
+ .updater(new Sgd(0.1)).seed(12345).graphBuilder().addInputs("in")
.addLayer("0", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(),
"in")
.addLayer("1", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(),
@@ -270,21 +271,21 @@ public void testOutputMaskingScoreMagnitudes() {
INDArray labels = Nd4j.ones(miniBatch, nOut, tsLength);
ComputationGraphConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(false).seed(12345L)
+ new NeuralNetConfiguration.Builder().seed(12345L)
.graphBuilder()
.addInputs("in").addLayer("0",
new GravesLSTM.Builder().nIn(nIn).nOut(5)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0,
1))
- .updater(Updater.NONE).build(),
+ .updater(new NoOp()).build(),
"in")
.addLayer("1", new RnnOutputLayer.Builder(
LossFunctions.LossFunction.MSE)
.activation(Activation.IDENTITY)
.nIn(5).nOut(nOut)
.weightInit(WeightInit.ZERO)
- .updater(Updater.NONE).build(),
+ .updater(new NoOp()).build(),
"0")
.setOutputs("1").pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
@@ -338,42 +339,42 @@ public void testOutputMasking() {
INDArray input = Nd4j.rand(new int[] {miniBatch, nIn, tsLength});
ComputationGraphConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(false).seed(12345L)
+ new NeuralNetConfiguration.Builder().seed(12345L)
.graphBuilder()
.addInputs("in").addLayer("0",
new GravesLSTM.Builder().nIn(nIn).nOut(5)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0,
1))
- .updater(Updater.NONE).build(),
+ .updater(new NoOp()).build(),
"in")
.addLayer("1", new RnnOutputLayer.Builder(
LossFunctions.LossFunction.MSE)
.activation(Activation.IDENTITY)
.nIn(5).nOut(nOut)
.weightInit(WeightInit.XAVIER)
- .updater(Updater.NONE).build(),
+ .updater(new NoOp()).build(),
"0")
.setOutputs("1").pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
ComputationGraphConfiguration conf2 =
- new NeuralNetConfiguration.Builder().regularization(false).seed(12345L)
+ new NeuralNetConfiguration.Builder().seed(12345L)
.graphBuilder()
.addInputs("in").addLayer("0",
new GravesLSTM.Builder().nIn(nIn).nOut(5)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0,
1))
- .updater(Updater.NONE).build(),
+ .updater(new NoOp()).build(),
"in")
.addLayer("1", new RnnOutputLayer.Builder(
LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX)
.nIn(5).nOut(nOut)
.weightInit(WeightInit.XAVIER)
- .updater(Updater.NONE).build(),
+ .updater(new NoOp()).build(),
"0")
.setOutputs("1").pretrain(false).backprop(true).build();
ComputationGraph net2 = new ComputationGraph(conf2);
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java
index e6723ddf6592..51c4bd86cd1c 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java
@@ -1,27 +1,25 @@
package org.deeplearning4j.nn.graph.graphnodes;
-import org.deeplearning4j.nn.conf.WorkspaceMode;
-import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.conf.layers.GravesLSTM;
-import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
-import org.deeplearning4j.nn.transferlearning.TransferLearning;
-import org.deeplearning4j.nn.weights.WeightInit;
-import org.nd4j.linalg.learning.config.AdaDelta;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.WorkspaceMode;
import org.deeplearning4j.nn.conf.graph.ElementWiseVertex;
import org.deeplearning4j.nn.conf.graph.PreprocessorVertex;
import org.deeplearning4j.nn.conf.graph.rnn.DuplicateToTimeSeriesVertex;
import org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex;
+import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.EmbeddingLayer;
+import org.deeplearning4j.nn.conf.layers.GravesLSTM;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
+import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.graph.vertex.GraphVertex;
import org.deeplearning4j.nn.graph.vertex.impl.*;
+import org.deeplearning4j.nn.transferlearning.TransferLearning;
+import org.deeplearning4j.nn.weights.WeightInit;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
@@ -29,14 +27,14 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.AdaDelta;
import org.nd4j.linalg.lossfunctions.LossFunctions;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
import java.util.Map;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.*;
public class TestGraphNodes {
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java
index e0192b87edb9..3ce8938d6930 100755
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java
@@ -22,7 +22,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer;
@@ -38,6 +37,8 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Nesterovs;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
import org.slf4j.Logger;
@@ -59,8 +60,8 @@ private ComputationGraph getGraph(int numLabels, double lambda) {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE)
- .learningRate(1.0).graphBuilder().addInputs("input1")
+ .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(new NoOp())
+ .graphBuilder().addInputs("input1")
.addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.RELU).build(),
"input1")
.addLayer("lossLayer", new CenterLossOutputLayer.Builder()
@@ -80,9 +81,9 @@ public ComputationGraph getCNNMnistConfig() {
int outputNum = 10; // The number of possible outcomes
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1) // Training iterations as above
- .regularization(true).l2(0.0005).learningRate(0.01).weightInit(WeightInit.XAVIER)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS)
- .momentum(0.9).graphBuilder().addInputs("input")
+ .l2(0.0005).weightInit(WeightInit.XAVIER)
+ .updater(new Nesterovs(0.01, 0.9))
+ .graphBuilder().addInputs("input")
.setInputTypes(InputType.convolutionalFlat(28, 28, 1))
.addLayer("0", new ConvolutionLayer.Builder(5, 5)
//nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java
index 2e54bbc9bb5f..d934397e79e6 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java
@@ -1,11 +1,9 @@
package org.deeplearning4j.nn.layers;
import lombok.extern.slf4j.Slf4j;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@@ -17,6 +15,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.List;
@@ -37,11 +36,10 @@ public class FrozenLayerTest {
public void testFrozen() {
DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3));
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
.activation(Activation.IDENTITY);
- FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().learningRate(0.1).build();
+ FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build();
MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(overallConf.clone().list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build())
@@ -93,8 +91,7 @@ public void cloneMLNFrozen() {
DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3));
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
.activation(Activation.IDENTITY);
MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(overallConf.list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build())
@@ -146,8 +143,7 @@ public void cloneMLNFrozen() {
public void testFrozenCompGraph() {
DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3));
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
.activation(Activation.IDENTITY);
ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In")
@@ -198,8 +194,7 @@ public void cloneCompGraphFrozen() {
DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3));
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
.activation(Activation.IDENTITY);
ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In")
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java
index fc2cf66d43ef..b3aba35968c1 100755
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java
@@ -23,7 +23,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.GravesLSTM;
import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor;
@@ -42,6 +41,9 @@
import org.nd4j.linalg.dataset.SplitTestAndTrain;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.AdaGrad;
+import org.nd4j.linalg.learning.config.NoOp;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
import org.slf4j.Logger;
@@ -63,7 +65,7 @@ public class OutputLayerTest {
public void testIris2() {
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(10)
- .learningRate(1e-1)
+ .updater(new Sgd(1e-1))
.layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(4).nOut(3)
.weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
@@ -126,10 +128,10 @@ public void testWeightsDifferent() {
Nd4j.MAX_SLICES_TO_PRINT = Integer.MAX_VALUE;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).miniBatch(false).seed(123)
- .iterations(1000).learningRate(1e-1)
+ .miniBatch(false).seed(123)
+ .iterations(1000).updater(new AdaGrad(1e-1))
.layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(4).nOut(3)
- .weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD)
+ .weightInit(WeightInit.XAVIER)
.lossFunction(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
.activation(Activation.SOFTMAX).build())
.build();
@@ -172,10 +174,10 @@ public void testBinary() {
DataSet dataset = new DataSet(data, data2);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123).iterations(200)
- .learningRate(1e-2)
+ .seed(123).iterations(200)
+ .updater(new Sgd(1e-2))
.layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(6).nOut(2)
- .weightInit(WeightInit.ZERO).updater(Updater.SGD).activation(Activation.SOFTMAX)
+ .weightInit(WeightInit.ZERO).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).build())
.build();
@@ -194,7 +196,7 @@ public void testBinary() {
@Test
public void testIris() {
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(5).learningRate(1e-1)
+ .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(5).updater(new Sgd(1e-1))
.layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(4).nOut(3)
.weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
@@ -229,7 +231,7 @@ public void testIris() {
public void testSetParams() {
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(100)
- .learningRate(1e-1)
+ .updater(new Sgd(1e-1))
.layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(4).nOut(3)
.weightInit(WeightInit.ZERO).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
@@ -267,11 +269,11 @@ public void testOutputLayersRnnForwardPass() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1)).activation(Activation.TANH)
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
@@ -294,11 +296,11 @@ public void testOutputLayersRnnForwardPass() {
MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1)).activation(Activation.TANH)
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.build();
MultiLayerNetwork mlnRnn = new MultiLayerNetwork(confRnn);
@@ -354,11 +356,11 @@ public void testRnnOutputLayerIncEdgeCases() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .activation(Activation.TANH).updater(Updater.NONE).build())
+ .activation(Activation.TANH).updater(new NoOp()).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).pretrain(false).backprop(true)
.build();
@@ -371,11 +373,11 @@ public void testRnnOutputLayerIncEdgeCases() {
MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .activation(Activation.TANH).updater(Updater.NONE).build())
+ .activation(Activation.TANH).updater(new NoOp()).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork mlnRnn = new MultiLayerNetwork(confRnn);
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java
index 615eb0907612..a860a20dcb9f 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java
@@ -1,9 +1,7 @@
package org.deeplearning4j.nn.layers;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
@@ -16,6 +14,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.lang.reflect.Field;
@@ -35,8 +34,8 @@ public void testDropoutSimple() throws Exception {
int nOut = 8;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
- .iterations(1).regularization(true).dropOut(0.5).list()
+ .updater(new Sgd())
+ .iterations(1).dropOut(0.5).list()
.layer(0, new OutputLayer.Builder().activation(Activation.IDENTITY)
.lossFunction(LossFunctions.LossFunction.MSE).nIn(nIn).nOut(nOut)
.weightInit(WeightInit.XAVIER).build())
@@ -110,8 +109,7 @@ public void testDropoutMultiLayer() throws Exception {
int nOut = 4;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
- .iterations(1).regularization(true).dropOut(0.5).learningRate(1e-9)
+ .iterations(1).dropOut(0.5).updater(new Sgd(1e-9))
.weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(10, 11)) //Weight init to cause sigmoid saturation
.list()
.layer(0, new DenseLayer.Builder().activation(Activation.SIGMOID).nIn(nIn).nOut(layerSize)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java
index 491cfabfd69f..36b91e2d8c1e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java
@@ -62,7 +62,7 @@ public void testDenseToOutputLayer() {
//setup the network
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
- .regularization(true).l1(1e-1).l2(2e-4).useDropConnect(true).dropOut(0.5).miniBatch(true)
+ .l1(1e-1).l2(2e-4).dropOut(0.5).miniBatch(true)
.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list()
.layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER)
.activation(Activation.RELU).build())
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java
index 8597bc9d73b7..20cf2e83093e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java
@@ -7,7 +7,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
@@ -29,6 +28,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.Nesterovs;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import static org.junit.Assert.*;
@@ -49,7 +49,7 @@ public void before() {
public void testTwdFirstLayer() throws Exception {
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(123).iterations(5)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4)
- .regularization(true).momentum(0.9).updater(Updater.NESTEROVS).useDropConnect(true).dropOut(0.5)
+ .updater(new Nesterovs(0.9)).dropOut(0.5)
.list().layer(0,
new ConvolutionLayer.Builder(8, 8) //16 filters kernel size 8 stride 4
.stride(4, 4).nOut(16).dropOut(0.5)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java
index 668dce641161..a5ce0522fa6e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java
@@ -1,6 +1,5 @@
package org.deeplearning4j.nn.layers.convolution;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.GradientNormalization;
@@ -20,6 +19,7 @@
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java
new file mode 100644
index 000000000000..f6fef11eaa5f
--- /dev/null
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java
@@ -0,0 +1,110 @@
+package org.deeplearning4j.nn.layers.convolution;
+
+import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
+import org.deeplearning4j.nn.api.Layer;
+import org.deeplearning4j.nn.conf.GradientNormalization;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.layers.Upsampling1D;
+import org.deeplearning4j.nn.gradient.Gradient;
+import org.junit.Test;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.dataset.DataSet;
+import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
+
+import java.util.Arrays;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author Max Pumperla
+ */
+public class Upsampling1DTest {
+
+ private int nExamples = 1;
+ private int depth = 20;
+ private int nChannelsIn = 1;
+ private int inputLength = 28;
+ private int size = 2;
+ private int outputLength = inputLength * size;
+ private INDArray epsilon = Nd4j.ones(nExamples, depth, outputLength);
+
+
+ @Test
+ public void testUpsampling1D() throws Exception {
+
+ double[] outArray = new double[] {1., 1., 2., 2., 3., 3., 4., 4.};
+ INDArray containedExpectedOut = Nd4j.create(outArray, new int[] {1, 1, 8});
+ INDArray containedInput = getContainedData();
+ INDArray input = getData();
+ Layer layer = getUpsampling1DLayer();
+
+ INDArray containedOutput = layer.activate(containedInput);
+ assertTrue(Arrays.equals(containedExpectedOut.shape(), containedOutput.shape()));
+ assertEquals(containedExpectedOut, containedOutput);
+
+ INDArray output = layer.activate(input);
+ assertTrue(Arrays.equals(new int[] {nExamples, nChannelsIn, outputLength},
+ output.shape()));
+ assertEquals(nChannelsIn, output.size(1), 1e-4);
+ }
+
+
+ @Test
+ public void testUpsampling1DBackprop() throws Exception {
+ INDArray expectedContainedEpsilonInput =
+ Nd4j.create(new double[] {1., 3., 2., 6., 7., 2., 5., 5.},
+ new int[] {1, 1, 8});
+
+ INDArray expectedContainedEpsilonResult = Nd4j.create(new double[] {4., 8., 9., 10.},
+ new int[] {1, 1, 4});
+
+ INDArray input = getContainedData();
+
+ Layer layer = getUpsampling1DLayer();
+ layer.activate(input);
+
+ Pair containedOutput = layer.backpropGradient(expectedContainedEpsilonInput);
+
+ assertEquals(expectedContainedEpsilonResult, containedOutput.getSecond());
+ assertEquals(null, containedOutput.getFirst().getGradientFor("W"));
+ assertEquals(expectedContainedEpsilonResult.shape().length, containedOutput.getSecond().shape().length);
+
+ INDArray input2 = getData();
+ layer.activate(input2);
+ int depth = input2.size(1);
+
+ epsilon = Nd4j.ones(5, depth, outputLength);
+
+ Pair out = layer.backpropGradient(epsilon);
+ assertEquals(input.shape().length, out.getSecond().shape().length);
+ assertEquals(depth, out.getSecond().size(1));
+ }
+
+
+ private Layer getUpsampling1DLayer() {
+ NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
+ .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123)
+ .layer(new Upsampling1D.Builder(size).build()).build();
+ return conf.getLayer().instantiate(conf, null, 0,
+ null, true);
+ }
+
+ public INDArray getData() throws Exception {
+ DataSetIterator data = new MnistDataSetIterator(5, 5);
+ DataSet mnist = data.next();
+ nExamples = mnist.numExamples();
+ INDArray features = mnist.getFeatureMatrix().reshape(nExamples, nChannelsIn, inputLength, inputLength);
+ return features.slice(0, 3);
+ }
+
+ private INDArray getContainedData() {
+ INDArray ret = Nd4j.create
+ (new double[] {1., 2., 3., 4.},
+ new int[] {1, 1, 4});
+ return ret;
+ }
+
+}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java
index e3abe33f2252..2b04a974927e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java
@@ -3,17 +3,10 @@
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.GradientNormalization;
-import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.conf.layers.OutputLayer;
-import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
import org.deeplearning4j.nn.conf.layers.Upsampling2D;
import org.deeplearning4j.nn.gradient.Gradient;
-import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
-import org.deeplearning4j.nn.weights.WeightInit;
import org.junit.Test;
-import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java
index da6b9fdae43a..17ae80244edb 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java
@@ -7,6 +7,7 @@
import org.deeplearning4j.nn.layers.custom.testclasses.CustomActivation;
import org.junit.Test;
import org.nd4j.linalg.activations.IActivation;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.shade.jackson.databind.ObjectMapper;
import org.nd4j.shade.jackson.databind.introspect.AnnotatedClass;
@@ -43,7 +44,7 @@ public void testCustomActivationFn() {
//Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works...
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.1).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)).list()
.layer(0, new DenseLayer.Builder().nIn(10).nOut(10).activation(new CustomActivation()).build())
.layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10).build())
.pretrain(false).backprop(true).build();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java
index 0712cb7830bf..446b9c7eb28e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java
@@ -72,7 +72,7 @@ public void testJsonMultiLayerNetwork() {
//Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works...
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(0.1).list()
+ new NeuralNetConfiguration.Builder().list()
.layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build())
.layer(1, new CustomLayer(3.14159)).layer(2,
new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
@@ -95,7 +95,7 @@ public void testJsonMultiLayerNetwork() {
public void testJsonComputationGraph() {
//ComputationGraph with a custom layer; check JSON and YAML config actually works...
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.1).graphBuilder()
+ ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder()
.addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in")
.addLayer("1", new CustomLayer(3.14159), "0").addLayer("2",
new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10)
@@ -120,7 +120,7 @@ public void testJsonComputationGraph() {
public void checkInitializationFF() {
//Actually create a network with a custom layer; check initialization and forward pass
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.1).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list()
.layer(0, new DenseLayer.Builder().nIn(9).nOut(10).build()).layer(1, new CustomLayer(3.14159)) //hard-coded nIn/nOut of 10
.layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(11).build())
.pretrain(false).backprop(true).build();
@@ -161,7 +161,7 @@ public void testCustomOutputLayerMLN() {
//Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works...
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().seed(12345).learningRate(0.1).list()
+ new NeuralNetConfiguration.Builder().seed(12345).list()
.layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build())
.layer(1, new CustomOutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.nIn(10).nOut(10).build())
@@ -187,7 +187,7 @@ public void testCustomOutputLayerMLN() {
//Fourth: compare to an equivalent standard output layer (should be identical)
MultiLayerConfiguration conf2 =
- new NeuralNetConfiguration.Builder().seed(12345).learningRate(0.1).weightInit(WeightInit.XAVIER)
+ new NeuralNetConfiguration.Builder().seed(12345).weightInit(WeightInit.XAVIER)
.list()
.layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1,
new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
@@ -212,7 +212,7 @@ public void testCustomOutputLayerMLN() {
@Test
public void testCustomOutputLayerCG() {
//Create a ComputationGraphConfiguration with custom output layer, and check JSON and YAML config actually works...
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).learningRate(0.1)
+ ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.graphBuilder().addInputs("in")
.addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1",
new CustomOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10)
@@ -239,7 +239,7 @@ public void testCustomOutputLayerCG() {
assertTrue(net.getLayer(1) instanceof CustomOutputLayerImpl);
//Fourth: compare to an equivalent standard output layer (should be identical)
- ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345).learningRate(0.1)
+ ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345)
.graphBuilder().addInputs("in")
.addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1",
new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomActivation.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomActivation.java
index 174f65348ddb..283068dbcfe0 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomActivation.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomActivation.java
@@ -1,10 +1,10 @@
package org.deeplearning4j.nn.layers.custom.testclasses;
import lombok.EqualsAndHashCode;
-import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.activations.BaseActivationFunction;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* Created by Alex on 19/12/2016.
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoderTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoderTest.java
index 4dc2f5e185f6..f7e38a25303e 100755
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoderTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoderTest.java
@@ -31,6 +31,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.Arrays;
@@ -59,8 +60,8 @@ public void testAutoEncoderBiasInit() {
public void testAutoEncoder() throws Exception {
MnistDataFetcher fetcher = new MnistDataFetcher(true);
- NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().momentum(0.9f)
- .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(1).learningRate(1e-1f)
+ NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
+ .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(1).updater(new Sgd(0.1))
.layer(new org.deeplearning4j.nn.conf.layers.AutoEncoder.Builder().nIn(784).nOut(600)
.corruptionLevel(0.6)
.lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).build())
@@ -88,9 +89,9 @@ public void testAutoEncoder() throws Exception {
public void testBackProp() throws Exception {
MnistDataFetcher fetcher = new MnistDataFetcher(true);
// LayerFactory layerFactory = LayerFactories.getFactory(new org.deeplearning4j.nn.conf.layers.AutoEncoder());
- NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().momentum(0.9f)
+ NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(100)
- .learningRate(1e-1f)
+ .updater(new Sgd(0.1))
.layer(new org.deeplearning4j.nn.conf.layers.AutoEncoder.Builder().nIn(784).nOut(600)
.corruptionLevel(0.6)
.lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).build())
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java
index b00daf2f9492..2d4e5587379d 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java
@@ -15,6 +15,7 @@
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import static org.junit.Assert.assertEquals;
@@ -107,7 +108,7 @@ private static MultiLayerNetwork getDenseMLNConfig(boolean backprop, boolean pre
long seed = 6;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
- .learningRate(1e-3).l1(0.3).regularization(true).l2(1e-3).list()
+ .updater(new Sgd(1e-3)).l1(0.3).l2(1e-3).list()
.layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(numInputs).nOut(3)
.activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(3).nOut(2)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java
index 2e285f8b5a67..2174f5ff47f2 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java
@@ -4,7 +4,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.EmbeddingLayer;
import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor;
@@ -16,6 +15,7 @@
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.List;
@@ -244,7 +244,7 @@ public void testEmbeddingLayerWithMasking() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(0.1).seed(12345).list()
+ .updater(new Sgd(0.1)).seed(12345).list()
.layer(0, new EmbeddingLayer.Builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses)
.nOut(5).build())
.layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build())
@@ -259,7 +259,7 @@ public void testEmbeddingLayerWithMasking() {
MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(0.1).seed(12345).list()
+ .updater(new Sgd(0.1)).seed(12345).list()
.layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5)
.build())
.layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build())
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBMTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBMTests.java
index 72be254a3bfc..9ff3113c13af 100755
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBMTests.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBMTests.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.layers.feedforward.rbm;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.fetchers.IrisDataFetcher;
import org.deeplearning4j.datasets.fetchers.MnistDataFetcher;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
@@ -26,7 +25,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.RBM.HiddenUnit;
@@ -47,8 +45,11 @@
import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization;
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -130,7 +131,7 @@ public void testMnist() throws Exception {
Nd4j.ENFORCE_NUMERICAL_STABILITY = true;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().iterations(30)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1e-1f)
+ .updater(new Sgd(0.1))
.layer(new org.deeplearning4j.nn.conf.layers.RBM.Builder().nIn(784).nOut(600)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(1, 1e-5))
.lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).build())
@@ -355,7 +356,7 @@ private static RBM getRBMLayer(int nIn, int nOut, HiddenUnit hiddenUnit, Visible
int learningRate) {
org.deeplearning4j.nn.conf.layers.RBM layer =
new org.deeplearning4j.nn.conf.layers.RBM.Builder(hiddenUnit, visibleUnit).nIn(nIn).nOut(nOut)
- .learningRate(learningRate).lossFunction(lossFunctions).build();
+ .updater(new Sgd(learningRate)).lossFunction(lossFunctions).build();
NeuralNetConfiguration conf =
new NeuralNetConfiguration.Builder().iterations(iterations).seed(42).layer(layer).build();
@@ -368,7 +369,7 @@ private static RBM getRBMLayer(int nIn, int nOut, HiddenUnit hiddenUnit, Visible
boolean pretrain, boolean initialize, int iterations, LossFunctions.LossFunction lossFunctions) {
org.deeplearning4j.nn.conf.layers.RBM layer =
new org.deeplearning4j.nn.conf.layers.RBM.Builder(hiddenUnit, visibleUnit).nIn(nIn).nOut(nOut)
- .learningRate(1e-1f).lossFunction(lossFunctions).build();
+ .updater(new Sgd(1e-1f)).lossFunction(lossFunctions).build();
NeuralNetConfiguration conf =
new NeuralNetConfiguration.Builder().iterations(iterations).seed(42).layer(layer).build();
@@ -380,8 +381,8 @@ private static RBM getRBMLayer(int nIn, int nOut, HiddenUnit hiddenUnit, Visible
private static MultiLayerNetwork getRBMMLNNet(boolean backprop, boolean pretrain, INDArray input, int nOut1,
int nOut2, WeightInit weightInit) {
MultiLayerConfiguration rbm = new NeuralNetConfiguration.Builder().seed(0xDEADBEEF).iterations(1000).biasInit(0)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NONE)
- .epsilon(1).weightInit(weightInit)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new NoOp())
+ .weightInit(weightInit)
.list(new org.deeplearning4j.nn.conf.layers.RBM.Builder(HiddenUnit.BINARY, VisibleUnit.BINARY)
.lossFunction(LossFunctions.LossFunction.MSE).nOut(nOut1).build(),
new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
@@ -398,8 +399,8 @@ private static MultiLayerNetwork getRBMMLNNet(boolean backprop, boolean pretrain
private static MultiLayerNetwork getMultiLayerRBMNet(boolean backprop, boolean pretrain, INDArray input, int nOut1,
int nOut2, int nOut3, WeightInit weightInit) {
MultiLayerConfiguration rbm = new NeuralNetConfiguration.Builder().seed(0xDEADBEEF).iterations(1000).biasInit(0)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NONE)
- .epsilon(1).weightInit(weightInit)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new NoOp())
+ .weightInit(weightInit)
.list(new org.deeplearning4j.nn.conf.layers.RBM.Builder()
.lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).nOut(nOut1).build(),
new org.deeplearning4j.nn.conf.layers.RBM.Builder()
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java
index 88a991bd3ee6..a2e49453c654 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java
@@ -1,6 +1,6 @@
package org.deeplearning4j.nn.layers.normalization;
-import org.nd4j.linalg.primitives.Pair;
+import org.deeplearning4j.TestUtils;
import org.deeplearning4j.datasets.iterator.impl.ListDataSetIterator;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.deeplearning4j.nn.api.Layer;
@@ -17,7 +17,6 @@
import org.deeplearning4j.nn.updater.MultiLayerUpdater;
import org.deeplearning4j.nn.updater.UpdaterBlock;
import org.deeplearning4j.nn.weights.WeightInit;
-import org.deeplearning4j.util.ModelSerializer;
import org.junit.Before;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
@@ -36,9 +35,8 @@
import org.nd4j.linalg.learning.RmsPropUpdater;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -394,7 +392,7 @@ public void checkSerialization() throws Exception {
// i.e., make sure state is properly stored
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(2).seed(12345)
+ .iterations(2).seed(12345)
.list()
.layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER)
.activation(Activation.IDENTITY).build())
@@ -421,13 +419,7 @@ public void checkSerialization() throws Exception {
assertEquals(out, out2);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ModelSerializer.writeModel(net, baos, true);
- baos.close();
- byte[] bArr = baos.toByteArray();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(bArr);
- MultiLayerNetwork net2 = ModelSerializer.restoreMultiLayerNetwork(bais, true);
+ MultiLayerNetwork net2 = TestUtils.testModelSerialization(net);
INDArray outDeser = net2.output(in, false);
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java
index 1fcab6f70184..b86ae4f42403 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java
@@ -1,6 +1,5 @@
package org.deeplearning4j.nn.layers.normalization;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
@@ -23,6 +22,7 @@
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.LossFunctions;
+import org.nd4j.linalg.primitives.Pair;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
@@ -120,7 +120,7 @@ public void testRegularization() {
// Confirm a structure with regularization true will not throw an error
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
- .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).regularization(true).l1(0.2)
+ .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).l1(0.2)
.l2(0.1).seed(123)
.layer(new LocalResponseNormalization.Builder().k(2).n(5).alpha(1e-4).beta(0.75).build())
.build();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java
index ae6d7daac072..809865769c3b 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java
@@ -1,31 +1,17 @@
package org.deeplearning4j.nn.layers.objdetect;
-import org.deeplearning4j.nn.conf.ConvolutionMode;
+import org.deeplearning4j.TestUtils;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
-import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
-import org.deeplearning4j.nn.weights.WeightInit;
-import org.deeplearning4j.util.ModelSerializer;
import org.junit.Test;
-import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.ops.transforms.Transforms;
-import org.nd4j.linalg.util.ArrayUtil;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.util.Arrays;
import static org.junit.Assert.*;
-import static org.nd4j.linalg.indexing.NDArrayIndex.all;
-import static org.nd4j.linalg.indexing.NDArrayIndex.interval;
-import static org.nd4j.linalg.indexing.NDArrayIndex.point;
+import static org.nd4j.linalg.indexing.NDArrayIndex.*;
public class TestYolo2OutputLayer {
@@ -100,14 +86,7 @@ public void testYoloActivateScoreBasic() throws Exception {
//Finally: test ser/de:
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ModelSerializer.writeModel(net, baos, true);
- byte[] bytes = baos.toByteArray();
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- MultiLayerNetwork netLoaded = ModelSerializer.restoreMultiLayerNetwork(bais, true);
-
- assertEquals(net.params(), netLoaded.params());
- assertEquals(net.getLayerWiseConfigurations(), netLoaded.getLayerWiseConfigurations());
+ MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net);
y2impl = (org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer) netLoaded.getLayer(1);
y2impl.setInput(input);
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java
index b8f25abe3ecd..ac3a07070aed 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java
@@ -3,7 +3,6 @@
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.GravesLSTM;
@@ -17,6 +16,7 @@
import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.Random;
@@ -41,8 +41,8 @@ public void testMaskingRnn() {
for (int miniBatchSize : minibatchSizes) {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false)
- .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.build())
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java
index 922c2abea808..24c54bc10814 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java
@@ -1,14 +1,12 @@
package org.deeplearning4j.nn.layers.recurrent;
import junit.framework.TestCase;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.api.Model;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.CacheMode;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@@ -24,7 +22,10 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.AdaGrad;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -257,7 +258,7 @@ public void testSimpleForwardsAndBackwardsActivation() {
.layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder()
.nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-0.1, 0.1))
- .activation(Activation.TANH).updater(Updater.NONE).build())
+ .activation(Activation.TANH).updater(new NoOp()).build())
.build();
final NeuralNetConfiguration confForwards = new NeuralNetConfiguration.Builder()
@@ -463,7 +464,8 @@ public void testConvergence() {
final MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(5)
- .learningRate(0.1).rmsDecay(0.95).regularization(true).l2(0.001).updater(Updater.ADAGRAD)
+ .updater(new AdaGrad(0.1))
+ .l2(0.001)
.seed(12345).list().pretrain(false)
.layer(0, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder()
.activation(Activation.TANH).nIn(2).nOut(2).weightInit(WeightInit.DISTRIBUTION)
@@ -524,7 +526,8 @@ public void testSerialization() {
final MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(5)
- .learningRate(0.1).rmsDecay(0.95).regularization(true).l2(0.001).updater(Updater.ADAGRAD)
+ .updater(new AdaGrad(0.1))
+ .l2(0.001)
.seed(12345).list().pretrain(false)
.layer(0, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder()
.activation(Activation.TANH).nIn(2).nOut(2).weightInit(WeightInit.DISTRIBUTION)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java
index c11e28390292..26820ae76e48 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java
@@ -1,10 +1,8 @@
package org.deeplearning4j.nn.layers.recurrent;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@@ -16,7 +14,9 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
+import org.nd4j.linalg.primitives.Pair;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
@@ -177,7 +177,7 @@ public void testSingleExample() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(0.1).seed(12345).list()
+ .updater(new Sgd(0.1)).seed(12345).list()
.layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().activation(Activation.TANH)
.nIn(2).nOut(2).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder()
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestReconstructionDistributions.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestReconstructionDistributions.java
index 29fbdb77f54b..476f817b8150 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestReconstructionDistributions.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestReconstructionDistributions.java
@@ -10,6 +10,7 @@
import org.deeplearning4j.nn.conf.layers.variational.GaussianReconstructionDistribution;
import org.deeplearning4j.nn.conf.layers.variational.ReconstructionDistribution;
import org.junit.Test;
+import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
@@ -46,7 +47,7 @@ public void testGaussianLogProb() {
distributionParams.get(NDArrayIndex.all(), NDArrayIndex.interval(inputSize, 2 * inputSize))
.assign(logStdevSquared);
- ReconstructionDistribution dist = new GaussianReconstructionDistribution("identity");
+ ReconstructionDistribution dist = new GaussianReconstructionDistribution(Activation.IDENTITY);
double negLogProb = dist.negLogProbability(x, distributionParams, average);
@@ -115,7 +116,7 @@ public void testBernoulliLogProb() {
INDArray distributionParams = Nd4j.rand(minibatch, inputSize).muli(2).subi(1); //i.e., pre-sigmoid prob
INDArray prob = Transforms.sigmoid(distributionParams, true);
- ReconstructionDistribution dist = new BernoulliReconstructionDistribution("sigmoid");
+ ReconstructionDistribution dist = new BernoulliReconstructionDistribution(Activation.SIGMOID);
double negLogProb = dist.negLogProbability(x, distributionParams, average);
@@ -192,7 +193,7 @@ public void testExponentialLogProb() {
INDArray distributionParams = Nd4j.rand(minibatch, inputSize).muli(2).subi(1); //i.e., pre-afn gamma
INDArray gammas = Transforms.tanh(distributionParams, true);
- ReconstructionDistribution dist = new ExponentialReconstructionDistribution("tanh");
+ ReconstructionDistribution dist = new ExponentialReconstructionDistribution(Activation.TANH);
double negLogProb = dist.negLogProbability(x, distributionParams, average);
@@ -263,11 +264,11 @@ public void gradientCheckReconstructionDistributions() {
Random r = new Random(12345);
ReconstructionDistribution[] distributions =
- new ReconstructionDistribution[] {new GaussianReconstructionDistribution("identity"),
- new GaussianReconstructionDistribution("tanh"),
- new BernoulliReconstructionDistribution("sigmoid"),
- new ExponentialReconstructionDistribution("identity"),
- new ExponentialReconstructionDistribution("tanh")};
+ new ReconstructionDistribution[] {new GaussianReconstructionDistribution(Activation.IDENTITY),
+ new GaussianReconstructionDistribution(Activation.TANH),
+ new BernoulliReconstructionDistribution(Activation.SIGMOID),
+ new ExponentialReconstructionDistribution(Activation.IDENTITY),
+ new ExponentialReconstructionDistribution(Activation.TANH)};
List passes = new ArrayList<>();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java
index d500a3dcf80f..b3ce278b9147 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java
@@ -1,9 +1,7 @@
package org.deeplearning4j.nn.layers.variational;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.variational.*;
@@ -18,6 +16,7 @@
import org.nd4j.linalg.api.ops.random.impl.BernoulliDistribution;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.lossfunctions.impl.LossMAE;
import org.nd4j.linalg.lossfunctions.impl.LossMSE;
@@ -253,26 +252,25 @@ public void testJsonYaml() {
MultiLayerConfiguration config = new NeuralNetConfiguration.Builder().seed(12345).list()
.layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder()
- .reconstructionDistribution(new GaussianReconstructionDistribution("identity"))
+ .reconstructionDistribution(new GaussianReconstructionDistribution(Activation.IDENTITY))
.nIn(3).nOut(4).encoderLayerSizes(5).decoderLayerSizes(6).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder()
- .reconstructionDistribution(new GaussianReconstructionDistribution("tanh"))
+ .reconstructionDistribution(new GaussianReconstructionDistribution(Activation.TANH))
.nIn(7).nOut(8).encoderLayerSizes(9).decoderLayerSizes(10).build())
.layer(2, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder()
.reconstructionDistribution(new BernoulliReconstructionDistribution()).nIn(11)
.nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build())
.layer(3, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder()
- .reconstructionDistribution(new ExponentialReconstructionDistribution("tanh"))
+ .reconstructionDistribution(new ExponentialReconstructionDistribution(Activation.TANH))
.nIn(11).nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build())
.layer(4, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder()
- //.lossFunction("tanh", LossFunctions.LossFunction.MSE)
.lossFunction(new ActivationTanH(), LossFunctions.LossFunction.MSE).nIn(11)
.nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build())
.layer(5, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder()
.reconstructionDistribution(new CompositeReconstructionDistribution.Builder()
.addDistribution(5, new GaussianReconstructionDistribution())
.addDistribution(5,
- new GaussianReconstructionDistribution("tanh"))
+ new GaussianReconstructionDistribution(Activation.TANH))
.addDistribution(5, new BernoulliReconstructionDistribution())
.build())
.nIn(15).nOut(16).encoderLayerSizes(17).decoderLayerSizes(18).build())
@@ -334,8 +332,8 @@ public void testReconstructionDistributionsSimple() {
throw new RuntimeException();
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.3)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3)
+ .updater(new Sgd(1.0))
.seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
.list().layer(0,
new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3)
@@ -343,7 +341,7 @@ public void testReconstructionDistributionsSimple() {
.pzxActivationFunction(Activation.TANH)
.reconstructionDistribution(
reconstructionDistributions[i])
- .activation(new ActivationTanH()).updater(Updater.SGD)
+ .activation(new ActivationTanH())
.build())
.pretrain(true).backprop(false).build();
@@ -400,8 +398,8 @@ public void testReconstructionErrorSimple() {
for (int i = 0; i < reconstructionDistributions.length; i++) {
INDArray data = Nd4j.rand(minibatch, inOutSize).muli(2).subi(1);
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.3)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3)
+ .updater(new Sgd(1.0))
.seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
.list().layer(0,
new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3)
@@ -409,7 +407,7 @@ public void testReconstructionErrorSimple() {
.pzxActivationFunction(Activation.TANH)
.reconstructionDistribution(
reconstructionDistributions[i])
- .activation(new ActivationTanH()).updater(Updater.SGD)
+ .activation(new ActivationTanH())
.build())
.pretrain(true).backprop(false).build();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java
index 635a70e94caa..bb6fdcc5f700 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java
@@ -1,6 +1,5 @@
package org.deeplearning4j.nn.misc;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.conf.CacheMode;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
@@ -19,6 +18,7 @@
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.List;
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java
index cf3c664753af..f7e9860176cb 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java
@@ -2,10 +2,8 @@
import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
import org.deeplearning4j.nn.api.Layer;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -19,6 +17,7 @@
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
import java.util.Arrays;
@@ -299,19 +298,18 @@ private static void testIrisMiniBatchGradients(int miniBatchSize, int[] hiddenLa
*/
private static MultiLayerConfiguration getIrisMLPSimpleConfig(int[] hiddenLayerSizes,
Activation activationFunction) {
- NeuralNetConfiguration.ListBuilder lb = new NeuralNetConfiguration.Builder().iterations(1).learningRate(0.1)
- .updater(Updater.SGD).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .regularization(false).seed(12345L).list();
+ NeuralNetConfiguration.ListBuilder lb = new NeuralNetConfiguration.Builder().iterations(1).updater(new Sgd(0.1))
+ .seed(12345L).list();
for (int i = 0; i < hiddenLayerSizes.length; i++) {
int nIn = (i == 0 ? 4 : hiddenLayerSizes[i - 1]);
lb.layer(i, new DenseLayer.Builder().nIn(nIn).nOut(hiddenLayerSizes[i]).weightInit(WeightInit.XAVIER)
- .updater(Updater.SGD).activation(activationFunction).build());
+ .activation(activationFunction).build());
}
lb.layer(hiddenLayerSizes.length,
new OutputLayer.Builder(LossFunction.MCXENT).nIn(hiddenLayerSizes[hiddenLayerSizes.length - 1])
- .nOut(3).weightInit(WeightInit.XAVIER).updater(Updater.SGD)
+ .nOut(3).weightInit(WeightInit.XAVIER)
.activation(activationFunction.equals(Activation.IDENTITY) ? Activation.IDENTITY
: Activation.SOFTMAX)
.build());
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/GravesLSTMOutputTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/GravesLSTMOutputTest.java
index b1fca78bffc0..8ecb6a95aa20 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/GravesLSTMOutputTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/GravesLSTMOutputTest.java
@@ -1,11 +1,9 @@
package org.deeplearning4j.nn.multilayer;
import org.deeplearning4j.eval.Evaluation;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.BackpropType;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.GravesLSTM;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
@@ -23,6 +21,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.AdaGrad;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.util.FeatureUtil;
import org.slf4j.Logger;
@@ -90,18 +89,14 @@ private Evaluation eval(MultiLayerNetwork network) {
private MultiLayerConfiguration getNetworkConf(int iterations, boolean useTBPTT) {
MultiLayerConfiguration.Builder builder =
new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .learningRate(0.1).regularization(true).l2(0.0025)
- .iterations(iterations).stepFunction(
- new NegativeDefaultStepFunction())
+ .updater(new AdaGrad(0.1)).l2(0.0025)
+ .iterations(iterations).stepFunction(new NegativeDefaultStepFunction())
.list()
.layer(0, new GravesLSTM.Builder().weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0.0, 0.01)).nIn(nIn)
- .nOut(layerSize).updater(Updater.ADAGRAD)
- .activation(Activation.TANH).build())
+ .nOut(layerSize).activation(Activation.TANH).build())
.layer(1, new OutputLayer.Builder(
- LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
- .updater(Updater.ADAGRAD).nIn(layerSize)
+ LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(layerSize)
.nOut(nIn).activation(Activation.SOFTMAX)
.build())
.inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).backprop(true)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java
index 8945fc80f632..52ee20eac276 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java
@@ -18,7 +18,7 @@
package org.deeplearning4j.nn.multilayer;
-import org.nd4j.linalg.primitives.Pair;
+import org.deeplearning4j.TestUtils;
import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.deeplearning4j.eval.Evaluation;
@@ -57,14 +57,16 @@
import org.nd4j.linalg.heartbeat.reports.Task;
import org.nd4j.linalg.heartbeat.utils.EnvironmentUtils;
import org.nd4j.linalg.heartbeat.utils.TaskUtils;
+import org.nd4j.linalg.learning.config.NoOp;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.io.ObjectOutputStream;
import java.util.*;
import static org.junit.Assert.*;
@@ -177,8 +179,8 @@ public void testDbn() throws Exception {
Nd4j.MAX_SLICES_TO_PRINT = -1;
Nd4j.MAX_ELEMENTS_PER_SLICE = -1;
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().iterations(100).momentum(0.9)
- .optimizationAlgo(OptimizationAlgorithm.LBFGS).regularization(true).l2(2e-4)
+ new NeuralNetConfiguration.Builder().iterations(100)
+ .optimizationAlgo(OptimizationAlgorithm.LBFGS).l2(2e-4)
.list().layer(0,
new RBM.Builder(RBM.HiddenUnit.GAUSSIAN,
RBM.VisibleUnit.GAUSSIAN).nIn(4).nOut(3)
@@ -363,7 +365,7 @@ public void testFeedForwardToLayer() {
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
- .iterations(5).learningRate(1e-3)
+ .updater(new Sgd(1e-3)).iterations(5)
.list().layer(
0, new RBM.Builder(RBM.HiddenUnit.RECTIFIED,
RBM.VisibleUnit.GAUSSIAN).nIn(nIn)
@@ -423,7 +425,7 @@ public void testBackpropGradient() {
int miniBatch = 5;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(0.1).list()
+ .updater(new Sgd(0.1)).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).activation(Activation.RELU)
.weightInit(WeightInit.XAVIER).build())
.layer(1, new DenseLayer.Builder().nIn(20).nOut(30).activation(Activation.RELU)
@@ -485,7 +487,7 @@ public void testLayerNames() {
layerNameList.add("dnn3");
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(0.1).list()
+ .updater(new Sgd(0.1)).list()
.layer(0, new DenseLayer.Builder().name("dnn1").nIn(nIn).nOut(20).activation(Activation.RELU)
.weightInit(WeightInit.XAVIER).build())
.layer(1, new DenseLayer.Builder().name("dnn2").nIn(20).nOut(30).activation(Activation.RELU)
@@ -505,7 +507,7 @@ public void testLayerNames() {
@Test
public void testTranspose() {
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().iterations(100).momentum(0.9).regularization(true).l2(2e-4)
+ new NeuralNetConfiguration.Builder().iterations(100).l2(2e-4)
.list().layer(0,
new RBM.Builder(RBM.HiddenUnit.GAUSSIAN,
RBM.VisibleUnit.GAUSSIAN).nIn(4).nOut(3)
@@ -557,15 +559,15 @@ public void testScoreExamples() {
Nd4j.getRandom().setSeed(12345);
int nIn = 5;
int nOut = 6;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01)
- .l2(0.01).learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01)
+ .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build())
.layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder()
.lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build())
.build();
- MultiLayerConfiguration confNoReg = new NeuralNetConfiguration.Builder().seed(12345).regularization(false)
- .learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
+ MultiLayerConfiguration confNoReg = new NeuralNetConfiguration.Builder().seed(12345)
+ .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build())
.layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder()
.lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build())
@@ -609,7 +611,7 @@ public void testScoreExamples() {
public void testDataSetScore() {
Nd4j.getRandom().setSeed(12345);
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.weightInit(WeightInit.XAVIER).seed(12345L).list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.SIGMOID).build())
.layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
@@ -634,7 +636,7 @@ public void testDataSetScoreCNN() {
int height = 3;
int nOut = 2;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.seed(12345L).list().layer(0, new ConvolutionLayer.Builder(2, 2).nOut(1).build())
.layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(2).build())
@@ -659,7 +661,7 @@ public void testDataSetScoreCNN() {
public void testPredict() throws Exception {
Nd4j.getRandom().setSeed(12345);
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.weightInit(WeightInit.XAVIER).seed(12345L).list()
.layer(0, new DenseLayer.Builder().nIn(784).nOut(50).activation(Activation.RELU).build())
.layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
@@ -699,7 +701,7 @@ public void testCid() throws Exception {
@Test
public void testOutput() throws Exception {
Nd4j.getRandom().setSeed(12345);
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.weightInit(WeightInit.XAVIER).seed(12345L).list()
.layer(0, new DenseLayer.Builder().nIn(784).nOut(50).activation(Activation.RELU).build())
.layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
@@ -735,8 +737,8 @@ public void testGradientUpdate() throws Exception {
expectedGradient.setGradientFor("1_b", Nd4j.ones(1, 3));
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.SGD)
- .learningRate(1).activation(Activation.RELU).weightInit(WeightInit.XAVIER)
+ new NeuralNetConfiguration.Builder().updater(new Sgd(1.0))
+ .activation(Activation.RELU).weightInit(WeightInit.XAVIER)
.list().layer(0, new DenseLayer.Builder().name("dnn1").nIn(4).nOut(5).build())
.layer(1, new OutputLayer.Builder().name("output").nIn(5).nOut(3)
.activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER)
@@ -843,8 +845,7 @@ public void testLayerPreTrainSetFalseAfterPreTrain() {
public MultiLayerNetwork getRBMModel(boolean preTrain, int nIn, int nOut) {
MultiLayerConfiguration rbm = new NeuralNetConfiguration.Builder()
- .seed(42).iterations(1).updater(Updater.NONE).epsilon(
- 1)
+ .seed(42).iterations(1).updater(new NoOp())
.weightInit(WeightInit.UNIFORM)
.list(new org.deeplearning4j.nn.conf.layers.RBM.Builder()
.lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY)
@@ -914,7 +915,7 @@ public void testBiasL1L2() {
.backprop(true).pretrain(false).build();
MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).regularization(true)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.l1Bias(0.1).l2Bias(0.2).iterations(1).weightInit(WeightInit.XAVIER).activation(Activation.TANH)
.seed(123).list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
@@ -1000,9 +1001,9 @@ public void testSummary() {
int V_HEIGHT = 130;
int V_NFRAMES = 150;
MultiLayerConfiguration confForArchitecture =
- new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l2(0.001) //l2 regularization on all layers
+ new NeuralNetConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .iterations(1).learningRate(0.4).list()
+ .iterations(1).list()
.layer(0, new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB
.nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(
WeightInit.RELU)
@@ -1016,12 +1017,12 @@ public void testSummary() {
.weightInit(WeightInit.RELU).updater(Updater.ADAGRAD)
.gradientNormalization(
GradientNormalization.ClipElementWiseAbsoluteValue)
- .gradientNormalizationThreshold(10).learningRate(0.5).build())
+ .gradientNormalizationThreshold(10).build())
.layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50)
.nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD)
.gradientNormalization(
GradientNormalization.ClipElementWiseAbsoluteValue)
- .gradientNormalizationThreshold(10).learningRate(0.6)
+ .gradientNormalizationThreshold(10)
.build())
.layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line
@@ -1182,14 +1183,7 @@ public void testEpochCounter() throws Exception {
assertEquals(4, net.getLayerWiseConfigurations().getEpochCount());
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
-
- ModelSerializer.writeModel(net, baos, true);
- byte[] bytes = baos.toByteArray();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
-
- MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
+ MultiLayerNetwork restored = TestUtils.testModelSerialization(net);
assertEquals(4, restored.getLayerWiseConfigurations().getEpochCount());
}
}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java
index b29b2ef75547..88d474243da3 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java
@@ -1,6 +1,5 @@
package org.deeplearning4j.nn.multilayer;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.BackpropType;
@@ -26,6 +25,7 @@
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.List;
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java
index e77817e9fb5e..f2527df6a420 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java
@@ -4,7 +4,10 @@
import org.deeplearning4j.eval.EvaluationBinary;
import org.deeplearning4j.gradientcheck.LossFunctionGradientCheck;
import org.deeplearning4j.nn.api.Layer;
-import org.deeplearning4j.nn.conf.*;
+import org.deeplearning4j.nn.conf.BackpropType;
+import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
@@ -20,6 +23,7 @@
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.lossfunctions.impl.*;
@@ -124,7 +128,7 @@ public void testPerOutputMaskingMLN() {
Activation a = act[i];
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp())
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345)
.list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
@@ -169,7 +173,7 @@ public void testPerOutputMaskingMLN() {
//Do the same for CompGraph
- ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(Updater.NONE)
+ ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(new NoOp())
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345)
.graphBuilder().addInputs("in")
.addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize)
@@ -209,7 +213,7 @@ public void testCompGraphEvalWithMask() {
int nIn = 5;
int nOut = 4;
- ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(Updater.NONE)
+ ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(new NoOp())
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345)
.graphBuilder().addInputs("in")
.addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java
index 39d6cd60ee2d..b32ca5f88009 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java
@@ -3,7 +3,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor;
@@ -17,6 +16,8 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.NoOp;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.Arrays;
@@ -45,7 +46,7 @@ public void testVariableLengthSimple() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(0.1).seed(12345).list()
+ .updater(new Sgd(0.1)).seed(12345).list()
.layer(0, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build())
.layer(1, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2)
.nOut(1).build())
@@ -133,7 +134,7 @@ public void testInputMasking() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(0.1).seed(12345).list()
+ .updater(new Sgd(0.1)).seed(12345).list()
.layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build())
.layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build())
.layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build())
@@ -270,17 +271,17 @@ public void testOutputMaskingScoreMagnitudes() {
INDArray labels = Nd4j.ones(miniBatch, nOut, tsLength);
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list()
+ new NeuralNetConfiguration.Builder().seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.layer(1, new RnnOutputLayer.Builder(
LossFunctions.LossFunction.MSE)
.activation(Activation.IDENTITY)
.nIn(5).nOut(nOut)
.weightInit(WeightInit.ZERO)
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
@@ -333,33 +334,33 @@ public void testOutputMasking() {
INDArray input = Nd4j.rand(new int[] {miniBatch, nIn, tsLength});
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list()
+ new NeuralNetConfiguration.Builder().seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.layer(1, new RnnOutputLayer.Builder(
LossFunctions.LossFunction.MSE)
.activation(Activation.IDENTITY)
.nIn(5).nOut(nOut)
.weightInit(WeightInit.XAVIER)
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
MultiLayerConfiguration conf2 =
- new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list()
+ new NeuralNetConfiguration.Builder().seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5)
.weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 1))
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.layer(1, new RnnOutputLayer.Builder(
LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX)
.nIn(5).nOut(nOut)
.weightInit(WeightInit.XAVIER)
- .updater(Updater.NONE).build())
+ .updater(new NoOp()).build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork mln2 = new MultiLayerNetwork(conf2);
mln2.init();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java
index a9f7a1e9a42f..9a411b799e61 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java
@@ -3,7 +3,6 @@
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -14,6 +13,10 @@
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Adam;
+import org.nd4j.linalg.learning.config.IUpdater;
+import org.nd4j.linalg.learning.config.Nesterovs;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import static org.junit.Assert.assertEquals;
@@ -34,13 +37,12 @@ public void testGradientApplyMultiLayerNetwork() {
int nOut = 10;
for (boolean regularization : new boolean[] {false, true}) {
- for (Updater u : new Updater[] {Updater.SGD, Updater.NESTEROVS, Updater.ADAM}) {
- // for (Updater u : new Updater[]{Updater.ADAM}) {
+ for (IUpdater u : new IUpdater[] {new Sgd(0.1), new Nesterovs(0.1), new Adam(0.1)}) {
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder().seed(12345).activation(Activation.TANH)
- .weightInit(WeightInit.XAVIER).updater(u).learningRate(0.1)
- .regularization(regularization).l1(regularization ? 0.2 : 0.0)
+ .weightInit(WeightInit.XAVIER).updater(u)
+ .l1(regularization ? 0.2 : 0.0)
.l2(regularization ? 0.3 : 0.0).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(10).build())
.layer(1, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(2,
@@ -82,7 +84,7 @@ public void testGradientApplyMultiLayerNetwork() {
Gradient g = net1GradCalc.gradient();
INDArray gBefore = g.gradient().dup(); //Net 1 gradient should be modified
INDArray net2GradBefore = net2GradUpd.gradient().gradient().dup(); //But net 2 gradient should not be
- net2GradUpd.getUpdater().update(net2GradUpd, g, 0, minibatch);
+ net2GradUpd.getUpdater().update(net2GradUpd, g, 0, 0, minibatch);
INDArray gAfter = g.gradient().dup();
INDArray net2GradAfter = net2GradUpd.gradient().gradient().dup();
@@ -99,7 +101,7 @@ public void testGradientApplyMultiLayerNetwork() {
//=============================
- if (u != Updater.SGD) {
+ if (!(u instanceof Sgd)) {
net2GradUpd.getUpdater().getStateViewArray().assign(net1GradCalc.getUpdater().getStateViewArray());
}
assertEquals(net1GradCalc.params(), net2GradUpd.params());
@@ -127,12 +129,12 @@ public void testGradientApplyComputationGraph() {
int nOut = 10;
for (boolean regularization : new boolean[] {false, true}) {
- for (Updater u : new Updater[] {Updater.SGD, Updater.ADAM}) {
+ for (IUpdater u : new IUpdater[] {new Sgd(0.1), new Adam(0.1)}) {
ComputationGraphConfiguration conf =
new NeuralNetConfiguration.Builder().seed(12345).activation(Activation.TANH)
- .weightInit(WeightInit.XAVIER).updater(u).learningRate(0.1)
- .regularization(regularization).l1(regularization ? 0.2 : 0.0)
+ .weightInit(WeightInit.XAVIER).updater(u)
+ .l1(regularization ? 0.2 : 0.0)
.l2(regularization ? 0.3 : 0.0).graphBuilder().addInputs("in")
.addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(10).build(), "in")
.addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).build(), "0")
@@ -174,7 +176,7 @@ public void testGradientApplyComputationGraph() {
Gradient g = net1GradCalc.gradient();
INDArray gBefore = g.gradient().dup(); //Net 1 gradient should be modified
INDArray net2GradBefore = net2GradUpd.gradient().gradient().dup(); //But net 2 gradient should not be
- net2GradUpd.getUpdater().update(g, 0, minibatch);
+ net2GradUpd.getUpdater().update(g, 0, 0, minibatch);
INDArray gAfter = g.gradient().dup();
INDArray net2GradAfter = net2GradUpd.gradient().gradient().dup();
@@ -190,7 +192,7 @@ public void testGradientApplyComputationGraph() {
assertEquals(net1GradCalc.params(), net2GradUpd.params());
//=============================
- if (u != Updater.SGD) {
+ if (!(u instanceof Sgd)) {
net2GradUpd.getUpdater().getStateViewArray().assign(net1GradCalc.getUpdater().getStateViewArray());
}
assertEquals(net1GradCalc.params(), net2GradUpd.params());
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningJson.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningJson.java
index a555336d2a6c..82be21432534 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningJson.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningJson.java
@@ -1,8 +1,8 @@
package org.deeplearning4j.nn.transferlearning;
-import org.deeplearning4j.nn.conf.Updater;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
+import org.nd4j.linalg.learning.config.AdaGrad;
import static org.junit.Assert.assertEquals;
@@ -15,7 +15,7 @@ public class TestTransferLearningJson {
public void testJsonYaml() {
FineTuneConfiguration c = new FineTuneConfiguration.Builder().activation(Activation.ELU).backprop(true)
- .updater(Updater.ADAGRAD).biasLearningRate(10.0).build();
+ .updater(new AdaGrad(1.0)).biasUpdater(new AdaGrad(10.0)).build();
String asJson = c.toJson();
String asYaml = c.toYaml();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java
index d1179b90bdf6..137ab9bf8e68 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java
@@ -1,9 +1,9 @@
package org.deeplearning4j.nn.transferlearning;
+import org.deeplearning4j.TestUtils;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.graph.GraphVertex;
import org.deeplearning4j.nn.conf.graph.LayerVertex;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
@@ -11,15 +11,13 @@
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.layers.FrozenLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
-import org.deeplearning4j.util.ModelSerializer;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
import java.util.Map;
import static org.junit.Assert.*;
@@ -32,13 +30,13 @@ public class TestTransferLearningModelSerializer {
@Test
public void testModelSerializerFrozenLayers() throws Exception {
- FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().learningRate(0.1).build();
+ FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build();
int nIn = 6;
int nOut = 3;
- MultiLayerConfiguration origConf = new NeuralNetConfiguration.Builder().learningRate(0.1).updater(Updater.SGD)
- .activation(Activation.TANH).regularization(true).dropOut(0.5).list()
+ MultiLayerConfiguration origConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
+ .activation(Activation.TANH).dropOut(0.5).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5).build())
.layer(1, new DenseLayer.Builder().nIn(5).nOut(4).build())
.layer(2, new DenseLayer.Builder().nIn(4).nOut(3).build())
@@ -60,15 +58,7 @@ public void testModelSerializerFrozenLayers() throws Exception {
assertTrue(withFrozen.getLayerWiseConfigurations().getConf(1)
.getLayer() instanceof org.deeplearning4j.nn.conf.layers.misc.FrozenLayer);
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ModelSerializer.writeModel(withFrozen, baos, false);
- baos.close();
-
- byte[] asBytes = baos.toByteArray();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(asBytes);
- MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais);
+ MultiLayerNetwork restored = TestUtils.testModelSerialization(withFrozen);
assertTrue(restored.getLayer(0) instanceof FrozenLayer);
assertTrue(restored.getLayer(1) instanceof FrozenLayer);
@@ -89,13 +79,12 @@ public void testModelSerializerFrozenLayers() throws Exception {
@Test
public void testModelSerializerFrozenLayersCompGraph() throws Exception {
- FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().learningRate(0.1).build();
+ FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build();
int nIn = 6;
int nOut = 3;
- ComputationGraphConfiguration origConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .updater(Updater.SGD).activation(Activation.TANH).graphBuilder().addInputs("in")
+ ComputationGraphConfiguration origConf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).graphBuilder().addInputs("in")
.addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(5).build(), "in")
.addLayer("1", new DenseLayer.Builder().nIn(5).nOut(4).build(), "0")
.addLayer("2", new DenseLayer.Builder().nIn(4).nOut(3).build(), "1")
@@ -119,15 +108,7 @@ public void testModelSerializerFrozenLayersCompGraph() throws Exception {
assertTrue(l0 instanceof org.deeplearning4j.nn.conf.layers.misc.FrozenLayer);
assertTrue(l1 instanceof org.deeplearning4j.nn.conf.layers.misc.FrozenLayer);
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ModelSerializer.writeModel(withFrozen, baos, false);
- baos.close();
-
- byte[] asBytes = baos.toByteArray();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(asBytes);
- ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais);
+ ComputationGraph restored = TestUtils.testModelSerialization(withFrozen);
assertTrue(restored.getLayer(0) instanceof FrozenLayer);
assertTrue(restored.getLayer(1) instanceof FrozenLayer);
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java
index 1e613f74c7f1..ba941eadc603 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java
@@ -3,7 +3,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
@@ -14,8 +13,14 @@
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Adam;
+import org.nd4j.linalg.learning.config.Nesterovs;
+import org.nd4j.linalg.learning.config.RmsProp;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
+import java.util.Collections;
+
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
@@ -31,8 +36,8 @@ public void simpleFineTune() {
DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3));
//original conf
ComputationGraphConfiguration confToChange = new NeuralNetConfiguration.Builder().seed(rng)
- .optimizationAlgo(OptimizationAlgorithm.LBFGS).updater(Updater.NESTEROVS).momentum(0.99)
- .learningRate(0.01).graphBuilder().addInputs("layer0In").setInputTypes(InputType.feedForward(4))
+ .optimizationAlgo(OptimizationAlgorithm.LBFGS).updater(new Nesterovs(0.01, 0.99))
+ .graphBuilder().addInputs("layer0In").setInputTypes(InputType.feedForward(4))
.addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In")
.addLayer("layer1",
new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
@@ -44,8 +49,8 @@ public void simpleFineTune() {
//conf with learning parameters changed
ComputationGraphConfiguration expectedConf = new NeuralNetConfiguration.Builder().seed(rng)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.RMSPROP)
- .learningRate(0.2).regularization(true).graphBuilder().addInputs("layer0In")
+ .updater(new RmsProp(0.2))
+ .graphBuilder().addInputs("layer0In")
.setInputTypes(InputType.feedForward(4))
.addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In")
.addLayer("layer1",
@@ -65,10 +70,7 @@ public void simpleFineTune() {
ComputationGraph modelNow =
new TransferLearning.GraphBuilder(modelToFineTune)
.fineTuneConfiguration(new FineTuneConfiguration.Builder().seed(rng)
- .optimizationAlgo(
- OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .updater(Updater.RMSPROP).learningRate(0.2).regularization(true)
- .build())
+ .updater(new RmsProp(0.2)).build())
.build();
//Check json
@@ -85,11 +87,9 @@ public void simpleFineTune() {
public void testNoutChanges() {
DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 2));
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
.activation(Activation.IDENTITY);
- FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().updater(new Sgd(0.1))
.activation(Activation.IDENTITY).build();
ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In")
@@ -156,11 +156,9 @@ public void testNoutChanges() {
public void testRemoveAndAdd() {
DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3));
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
.activation(Activation.IDENTITY);
- FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().updater(new Sgd(0.1))
.activation(Activation.IDENTITY).build();
ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In")
@@ -226,139 +224,134 @@ public void testAllWithCNN() {
DataSet randomData = new DataSet(Nd4j.rand(10, 28 * 28 * 3).reshape(10, 3, 28, 28), Nd4j.rand(10, 10));
ComputationGraph modelToFineTune =
new ComputationGraph(
- new NeuralNetConfiguration.Builder().seed(123).iterations(1).learningRate(.01)
- .weightInit(WeightInit.XAVIER)
- .optimizationAlgo(
- OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .updater(Updater.NESTEROVS).momentum(0.9).graphBuilder()
- .addInputs("layer0In")
- .setInputTypes(InputType.convolutionalFlat(28, 28,
- 3))
- .addLayer("layer0",
- new ConvolutionLayer.Builder(5, 5).nIn(3)
- .stride(1, 1).nOut(20)
- .activation(Activation.IDENTITY)
- .build(),
- "layer0In")
- .addLayer("layer1",
- new SubsamplingLayer.Builder(
- SubsamplingLayer.PoolingType.MAX)
- .kernelSize(2, 2)
- .stride(2, 2)
- .build(),
- "layer0")
- .addLayer("layer2",
- new ConvolutionLayer.Builder(5, 5).stride(1, 1)
- .nOut(50)
- .activation(Activation.IDENTITY)
- .build(),
- "layer1")
- .addLayer("layer3",
- new SubsamplingLayer.Builder(
- SubsamplingLayer.PoolingType.MAX)
- .kernelSize(2, 2)
- .stride(2, 2)
- .build(),
- "layer2")
- .addLayer("layer4",
- new DenseLayer.Builder()
- .activation(Activation.RELU)
- .nOut(500).build(),
- "layer3")
- .addLayer("layer5",
- new DenseLayer.Builder()
- .activation(Activation.RELU)
- .nOut(250).build(),
- "layer4")
- .addLayer("layer6",
- new OutputLayer.Builder(
- LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
- .nOut(100)
- .activation(Activation.SOFTMAX)
- .build(),
- "layer5")
- .setOutputs("layer5").backprop(true).pretrain(false).build());
+ new NeuralNetConfiguration.Builder().seed(123).iterations(1)
+ .weightInit(WeightInit.XAVIER)
+ .updater(new Nesterovs(0.01, 0.9)).graphBuilder()
+ .addInputs("layer0In")
+ .setInputTypes(InputType.convolutionalFlat(28, 28,
+ 3))
+ .addLayer("layer0",
+ new ConvolutionLayer.Builder(5, 5).nIn(3)
+ .stride(1, 1).nOut(20)
+ .activation(Activation.IDENTITY)
+ .build(),
+ "layer0In")
+ .addLayer("layer1",
+ new SubsamplingLayer.Builder(
+ SubsamplingLayer.PoolingType.MAX)
+ .kernelSize(2, 2)
+ .stride(2, 2)
+ .build(),
+ "layer0")
+ .addLayer("layer2",
+ new ConvolutionLayer.Builder(5, 5).stride(1, 1)
+ .nOut(50)
+ .activation(Activation.IDENTITY)
+ .build(),
+ "layer1")
+ .addLayer("layer3",
+ new SubsamplingLayer.Builder(
+ SubsamplingLayer.PoolingType.MAX)
+ .kernelSize(2, 2)
+ .stride(2, 2)
+ .build(),
+ "layer2")
+ .addLayer("layer4",
+ new DenseLayer.Builder()
+ .activation(Activation.RELU)
+ .nOut(500).build(),
+ "layer3")
+ .addLayer("layer5",
+ new DenseLayer.Builder()
+ .activation(Activation.RELU)
+ .nOut(250).build(),
+ "layer4")
+ .addLayer("layer6",
+ new OutputLayer.Builder(
+ LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
+ .nOut(100)
+ .activation(Activation.SOFTMAX)
+ .build(),
+ "layer5")
+ .setOutputs("layer5").backprop(true).pretrain(false).build());
modelToFineTune.init();
//this will override the learning configuration set in the model
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().seed(456).learningRate(0.001)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
- FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().seed(456).learningRate(0.001)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().seed(456).updater(new Sgd(0.001));
+ FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().seed(456).updater(new Sgd(0.001))
.build();
ComputationGraph modelNow =
- new TransferLearning.GraphBuilder(modelToFineTune).fineTuneConfiguration(fineTuneConfiguration)
- .setFeatureExtractor("layer1").nOutReplace("layer4", 600, WeightInit.XAVIER)
- .removeVertexAndConnections("layer5").removeVertexAndConnections("layer6")
- .setInputs("layer0In").setInputTypes(InputType.convolutionalFlat(28, 28, 3))
- .addLayer("layer5",
- new DenseLayer.Builder().activation(Activation.RELU).nIn(600)
- .nOut(300).build(),
- "layer4")
- .addLayer("layer6",
- new DenseLayer.Builder().activation(Activation.RELU).nIn(300)
- .nOut(150).build(),
- "layer5")
- .addLayer("layer7",
- new DenseLayer.Builder().activation(Activation.RELU).nIn(150)
- .nOut(50).build(),
- "layer6")
- .addLayer("layer8",
- new OutputLayer.Builder(
- LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
- .activation(Activation.SOFTMAX)
- .nIn(50).nOut(10).build(),
- "layer7")
- .setOutputs("layer8").build();
+ new TransferLearning.GraphBuilder(modelToFineTune).fineTuneConfiguration(fineTuneConfiguration)
+ .setFeatureExtractor("layer1").nOutReplace("layer4", 600, WeightInit.XAVIER)
+ .removeVertexAndConnections("layer5").removeVertexAndConnections("layer6")
+ .setInputs("layer0In").setInputTypes(InputType.convolutionalFlat(28, 28, 3))
+ .addLayer("layer5",
+ new DenseLayer.Builder().activation(Activation.RELU).nIn(600)
+ .nOut(300).build(),
+ "layer4")
+ .addLayer("layer6",
+ new DenseLayer.Builder().activation(Activation.RELU).nIn(300)
+ .nOut(150).build(),
+ "layer5")
+ .addLayer("layer7",
+ new DenseLayer.Builder().activation(Activation.RELU).nIn(150)
+ .nOut(50).build(),
+ "layer6")
+ .addLayer("layer8",
+ new OutputLayer.Builder(
+ LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
+ .activation(Activation.SOFTMAX)
+ .nIn(50).nOut(10).build(),
+ "layer7")
+ .setOutputs("layer8").build();
ComputationGraph modelExpectedArch =
new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In")
- .setInputTypes(InputType.convolutionalFlat(28,
- 28, 3))
- .addLayer("layer0",
- new FrozenLayer(new ConvolutionLayer.Builder(5, 5).nIn(3)
- .stride(1, 1).nOut(20)
- .activation(Activation.IDENTITY).build()),
- "layer0In")
- .addLayer("layer1",
- new FrozenLayer(new SubsamplingLayer.Builder(
- SubsamplingLayer.PoolingType.MAX)
- .kernelSize(2, 2).stride(2, 2)
- .build()),
- "layer0")
- .addLayer("layer2",
- new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50)
- .activation(Activation.IDENTITY).build(),
- "layer1")
- .addLayer("layer3",
- new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
- .kernelSize(2, 2).stride(2, 2).build(),
- "layer2")
- .addLayer("layer4",
- new DenseLayer.Builder().activation(Activation.RELU).nOut(600)
- .build(),
- "layer3")
- .addLayer("layer5",
- new DenseLayer.Builder().activation(Activation.RELU).nOut(300)
- .build(),
- "layer4")
- .addLayer("layer6",
- new DenseLayer.Builder().activation(Activation.RELU).nOut(150)
- .build(),
- "layer5")
- .addLayer("layer7",
- new DenseLayer.Builder().activation(Activation.RELU).nOut(50)
- .build(),
- "layer6")
- .addLayer("layer8",
- new OutputLayer.Builder(
- LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
- .nOut(10)
- .activation(Activation.SOFTMAX)
- .build(),
- "layer7")
- .setOutputs("layer8").backprop(true).pretrain(false).build());
+ .setInputTypes(InputType.convolutionalFlat(28,28, 3))
+ .addLayer("layer0",
+ new FrozenLayer(new ConvolutionLayer.Builder(5, 5).nIn(3)
+ .stride(1, 1).nOut(20)
+ .activation(Activation.IDENTITY).build()),
+ "layer0In")
+ .addLayer("layer1",
+ new FrozenLayer(new SubsamplingLayer.Builder(
+ SubsamplingLayer.PoolingType.MAX)
+ .kernelSize(2, 2).stride(2, 2)
+ .build()),
+ "layer0")
+ .addLayer("layer2",
+ new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50)
+ .activation(Activation.IDENTITY).build(),
+ "layer1")
+ .addLayer("layer3",
+ new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
+ .kernelSize(2, 2).stride(2, 2).build(),
+ "layer2")
+ .addLayer("layer4",
+ new DenseLayer.Builder().activation(Activation.RELU).nOut(600)
+ .build(),
+ "layer3")
+ .addLayer("layer5",
+ new DenseLayer.Builder().activation(Activation.RELU).nOut(300)
+ .build(),
+ "layer4")
+ .addLayer("layer6",
+ new DenseLayer.Builder().activation(Activation.RELU).nOut(150)
+ .build(),
+ "layer5")
+ .addLayer("layer7",
+ new DenseLayer.Builder().activation(Activation.RELU).nOut(50)
+ .build(),
+ "layer6")
+ .addLayer("layer8",
+ new OutputLayer.Builder(
+ LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
+ .nOut(10)
+ .activation(Activation.SOFTMAX)
+ .build(),
+ "layer7")
+ .setOutputs("layer8").backprop(true).pretrain(false).build());
modelExpectedArch.init();
modelExpectedArch.getVertex("layer0").setLayerAsFrozen();
modelExpectedArch.getVertex("layer1").setLayerAsFrozen();
@@ -380,11 +373,10 @@ public void testAllWithCNN() {
@Test
public void testTransferGlobalPool() {
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.ADAM)
- .adamMeanDecay(0.9).adamVarDecay(0.999).weightInit(WeightInit.XAVIER).learningRate(0.1)
+ ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new Adam(0.1))
+ .weightInit(WeightInit.XAVIER)
.graphBuilder().addInputs("in")
- .addLayer("blstm1",
- new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10)
+ .addLayer("blstm1",new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10)
.activation(Activation.TANH).build(),
"in")
.addLayer("pool", new GlobalPoolingLayer.Builder().build(), "blstm1")
@@ -397,19 +389,20 @@ public void testTransferGlobalPool() {
g.init();
FineTuneConfiguration fineTuneConfiguration =
- new FineTuneConfiguration.Builder().seed(12345).learningRate(0.01).build();
+ new FineTuneConfiguration.Builder().seed(12345).updater(new Sgd(0.01)).build();
ComputationGraph graph = new TransferLearning.GraphBuilder(g).fineTuneConfiguration(fineTuneConfiguration)
.removeVertexKeepConnections("out").setFeatureExtractor("dense")
- .addLayer("out", new OutputLayer.Builder().updater(Updater.ADAM).adamMeanDecay(0.9)
- .adamVarDecay(0.999).weightInit(WeightInit.XAVIER)
+ .addLayer("out", new OutputLayer.Builder().updater(new Adam(0.1))
+ .weightInit(WeightInit.XAVIER)
.activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT)
.nIn(10).nOut(5).build(), "dense")
.build();
ComputationGraphConfiguration confExpected = new NeuralNetConfiguration.Builder().seed(12345)
- .updater(Updater.ADAM).adamMeanDecay(0.9).adamVarDecay(0.999).weightInit(WeightInit.XAVIER)
- .learningRate(0.01).graphBuilder().addInputs("in")
+ .updater(new Sgd(0.01))
+ .weightInit(WeightInit.XAVIER)
+ .graphBuilder().addInputs("in")
.addLayer("blstm1",
new FrozenLayer(new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10)
.activation(Activation.TANH).build()),
@@ -417,12 +410,15 @@ public void testTransferGlobalPool() {
.addLayer("pool", new FrozenLayer(new GlobalPoolingLayer.Builder().build()), "blstm1")
.addLayer("dense", new FrozenLayer(new DenseLayer.Builder().nIn(10).nOut(10).build()), "pool")
.addLayer("out", new OutputLayer.Builder().nIn(10).nOut(5).activation(Activation.SOFTMAX)
+ .updater(new Adam(0.1))
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "dense")
.setOutputs("out").build();
ComputationGraph modelExpected = new ComputationGraph(confExpected);
modelExpected.init();
- assertEquals(confExpected, graph.getConfiguration());
+
+// assertEquals(confExpected, graph.getConfiguration());
+ assertEquals(confExpected.toJson(), graph.getConfiguration().toJson());
}
}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java
index ccac20a7ddb1..809855503caf 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java
@@ -5,7 +5,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.graph.MergeVertex;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.BaseLayer;
@@ -18,6 +17,8 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.MultiDataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Adam;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import static org.junit.Assert.*;
@@ -36,8 +37,8 @@ public void testMergeAndFreeze() {
// (b) Test global override (should be selective)
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.ADAM)
- .learningRate(1e-4).activation(Activation.LEAKYRELU).graphBuilder().addInputs("in1", "in2")
+ ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Adam(1e-4))
+ .activation(Activation.LEAKYRELU).graphBuilder().addInputs("in1", "in2")
.addLayer("A", new DenseLayer.Builder().nIn(10).nOut(9).build(), "in1")
.addLayer("B", new DenseLayer.Builder().nIn(9).nOut(8).build(), "A")
.addLayer("C", new DenseLayer.Builder().nIn(7).nOut(6).build(), "in2")
@@ -59,8 +60,7 @@ public void testMergeAndFreeze() {
ComputationGraph graph2 =
new TransferLearning.GraphBuilder(graph)
- .fineTuneConfiguration(
- new FineTuneConfiguration.Builder().learningRate(2e-2).build())
+ .fineTuneConfiguration(new FineTuneConfiguration.Builder().updater(new Adam(2e-2)).build())
.setFeatureExtractor("C").build();
boolean cFound = false;
@@ -79,8 +79,7 @@ public void testMergeAndFreeze() {
//Also check config:
BaseLayer bl = ((BaseLayer) l.conf().getLayer());
- assertEquals(Updater.ADAM, bl.getUpdater());
- assertEquals(2e-2, bl.getLearningRate(), 1e-5);
+ assertEquals(new Adam(2e-2), bl.getIUpdater());
assertEquals(Activation.LEAKYRELU.getActivationFunction(), bl.getActivationFn());
}
assertTrue(cFound);
@@ -90,9 +89,9 @@ public void testMergeAndFreeze() {
@Test
public void testSimplerMergeBackProp() {
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.9)
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9))
.activation(Activation.IDENTITY)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT);
/*
inCentre inRight
@@ -172,9 +171,8 @@ public void testSimplerMergeBackProp() {
@Test
public void testLessSimpleMergeBackProp() {
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.9)
- .activation(Activation.IDENTITY)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9))
+ .activation(Activation.IDENTITY);
/*
inCentre inRight
@@ -239,9 +237,8 @@ public void testLessSimpleMergeBackProp() {
@Test
public void testAddOutput() {
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.9)
- .activation(Activation.IDENTITY)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9))
+ .activation(Activation.IDENTITY);
ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight")
.addLayer("denseCentre0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "inCentre")
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java
index 02f20d466386..50dadb544bbe 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java
@@ -4,7 +4,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.graph.MergeVertex;
import org.deeplearning4j.nn.conf.graph.SubsetVertex;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
@@ -17,6 +16,7 @@
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.MultiDataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.List;
@@ -32,9 +32,9 @@ public class TransferLearningHelperTest {
@Test
public void tesUnfrozenSubset() {
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1).seed(124)
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().seed(124)
.activation(Activation.IDENTITY)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1));
/*
(inCentre) (inRight)
| |
@@ -114,9 +114,9 @@ public void tesUnfrozenSubset() {
@Test
public void testFitUnFrozen() {
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.9).seed(124)
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9)).seed(124)
.activation(Activation.IDENTITY)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT);
ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight")
.addLayer("denseCentre0", new DenseLayer.Builder().nIn(10).nOut(9).build(), "inCentre")
@@ -187,8 +187,8 @@ public void testFitUnFrozen() {
public void testMLN() {
DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3));
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.activation(Activation.IDENTITY);
MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(overallConf.clone().list()
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java
index 4bc7842d7486..e574b3658cb7 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java
@@ -2,7 +2,10 @@
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
-import org.deeplearning4j.nn.conf.*;
+import org.deeplearning4j.nn.conf.BackpropType;
+import org.deeplearning4j.nn.conf.GradientNormalization;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
@@ -16,6 +19,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.*;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import static org.junit.Assert.*;
@@ -34,7 +38,7 @@ public void simpleFineTune() {
//original conf
NeuralNetConfiguration.Builder confToChange =
new NeuralNetConfiguration.Builder().seed(rng).optimizationAlgo(OptimizationAlgorithm.LBFGS)
- .updater(Updater.NESTEROVS).momentum(0.99).learningRate(0.01);
+ .updater(new Nesterovs(0.01, 0.99));
MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(confToChange.list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build())
@@ -46,24 +50,22 @@ public void simpleFineTune() {
//model after applying changes with transfer learning
MultiLayerNetwork modelNow =
- new TransferLearning.Builder(modelToFineTune)
- .fineTuneConfiguration(new FineTuneConfiguration.Builder().seed(rng)
- .optimizationAlgo(
- OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .updater(Updater.RMSPROP).learningRate(0.5) //Intent: override both weight and bias LR, unless bias LR is manually set also
- .l2(0.4).regularization(true).build())
- .build();
+ new TransferLearning.Builder(modelToFineTune)
+ .fineTuneConfiguration(new FineTuneConfiguration.Builder().seed(rng)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
+ .updater(new RmsProp(0.5)) //Intent: override both weight and bias LR, unless bias LR is manually set also
+ .l2(0.4).build())
+ .build();
for (org.deeplearning4j.nn.api.Layer l : modelNow.getLayers()) {
BaseLayer bl = ((BaseLayer) l.conf().getLayer());
- assertEquals(Updater.RMSPROP, bl.getUpdater());
- assertEquals(0.5, bl.getLearningRate(), 1e-6);
+ assertEquals(new RmsProp(0.5), bl.getIUpdater());
}
NeuralNetConfiguration.Builder confSet = new NeuralNetConfiguration.Builder().seed(rng)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.RMSPROP)
- .learningRate(0.5).l2(0.4).regularization(true);
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
+ .updater(new RmsProp(0.5)).l2(0.4);
MultiLayerNetwork expectedModel = new MultiLayerNetwork(confSet.list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build())
@@ -94,10 +96,8 @@ public void simpleFineTune() {
public void testNoutChanges() {
DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 2));
- NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
- FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1));
+ FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.1))
.build();
MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(equivalentConf.list()
@@ -155,11 +155,8 @@ public void testNoutChanges() {
public void testRemoveAndAdd() {
DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3));
- NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
- FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().learningRate(0.1)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
- .build();
+ NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1));
+ FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build();
MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(//overallConf.list()
equivalentConf.list().layer(0, new DenseLayer.Builder().nIn(4).nOut(5).build())
@@ -177,7 +174,7 @@ public void testRemoveAndAdd() {
.nOutReplace(0, 7, WeightInit.XAVIER, WeightInit.XAVIER)
.nOutReplace(2, 5, WeightInit.XAVIER).removeOutputLayer()
.addLayer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5)
- .nOut(3).learningRate(0.5).activation(Activation.SOFTMAX)
+ .nOut(3).updater(new Sgd(0.5)).activation(Activation.SOFTMAX)
.build())
.build();
@@ -187,7 +184,7 @@ public void testRemoveAndAdd() {
.layer(2, new DenseLayer.Builder().nIn(2).nOut(5).build())
.layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX)
- .learningRate(0.5).nIn(5).nOut(3).build())
+ .updater(new Sgd(0.5)).nIn(5).nOut(3).build())
.build());
modelExpectedArch.init();
@@ -214,34 +211,29 @@ public void testRemoveAndProcessing() {
int V_NFRAMES = 150;
MultiLayerConfiguration confForArchitecture =
- new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l2(0.001) //l2 regularization on all layers
+ new NeuralNetConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .iterations(1).learningRate(0.4).list()
+ .iterations(1).updater(new AdaGrad(0.4)).list()
.layer(0, new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB
.nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(
- WeightInit.RELU)
- .updater(Updater.ADAGRAD).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30
+ WeightInit.RELU).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30
.layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
.kernelSize(3, 3).stride(2, 2).build()) //(31-3+0)/2+1 = 15
.layer(2, new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2)
.activation(Activation.RELU).weightInit(WeightInit.RELU)
- .updater(Updater.ADAGRAD).build()) //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490
+ .build()) //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490
.layer(3, new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50)
- .weightInit(WeightInit.RELU).updater(Updater.ADAGRAD)
- .gradientNormalization(
- GradientNormalization.ClipElementWiseAbsoluteValue)
- .gradientNormalizationThreshold(10).learningRate(0.5).build())
+ .weightInit(WeightInit.RELU).updater(new AdaGrad(0.5))
+ .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
+ .gradientNormalizationThreshold(10).build())
.layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50)
- .nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD)
- .gradientNormalization(
- GradientNormalization.ClipElementWiseAbsoluteValue)
- .gradientNormalizationThreshold(10).learningRate(0.6)
- .build())
+ .nOut(50).weightInit(WeightInit.XAVIER).updater(new AdaGrad(0.6))
+ .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
+ .gradientNormalizationThreshold(10).build())
.layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line
- .updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER)
- .gradientNormalization(
- GradientNormalization.ClipElementWiseAbsoluteValue)
+ .weightInit(WeightInit.XAVIER)
+ .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
.gradientNormalizationThreshold(10).build())
.inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3))
.inputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10))
@@ -254,18 +246,14 @@ public void testRemoveAndProcessing() {
MultiLayerNetwork modelToTweak =
new MultiLayerNetwork(
new NeuralNetConfiguration.Builder().seed(12345)
- //.regularization(true).l2(0.001) //change l2
- .optimizationAlgo(
- OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .iterations(1).learningRate(0.1) //change learning rate
- .updater(Updater.RMSPROP)// change updater
+ .iterations(1).updater(new RmsProp(0.1))
.list()
.layer(0, new ConvolutionLayer.Builder(10, 10) //Only keep the first layer the same
.nIn(3) //3 channels: RGB
.nOut(30).stride(4, 4)
.activation(Activation.RELU)
.weightInit(WeightInit.RELU)
- .updater(Updater.ADAGRAD).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30
+ .updater(new AdaGrad(0.1)).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30
.layer(1, new SubsamplingLayer.Builder(
SubsamplingLayer.PoolingType.MAX) //change kernel size
.kernelSize(5, 5).stride(2, 2)
@@ -280,7 +268,7 @@ public void testRemoveAndProcessing() {
.gradientNormalization(
GradientNormalization.ClipElementWiseAbsoluteValue)
.gradientNormalizationThreshold(10)
- .learningRate(0.01).build())
+ .updater(new RmsProp(0.01)).build())
.layer(4, new GravesLSTM.Builder() //change here
.activation(Activation.SOFTSIGN).nIn(50)
.nOut(25).weightInit(WeightInit.XAVIER)
@@ -290,15 +278,11 @@ public void testRemoveAndProcessing() {
.activation(Activation.SOFTMAX)
.nIn(25).nOut(4)
.weightInit(WeightInit.XAVIER)
- .gradientNormalization(
- GradientNormalization.ClipElementWiseAbsoluteValue)
- .gradientNormalizationThreshold(
- 10)
+ .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
+ .gradientNormalizationThreshold(10)
.build())
- .inputPreProcessor(0,
- new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3))
- .inputPreProcessor(3,
- new CnnToFeedForwardPreProcessor(5, 5, 10))
+ .inputPreProcessor(0,new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3))
+ .inputPreProcessor(3,new CnnToFeedForwardPreProcessor(5, 5, 10))
.inputPreProcessor(4, new FeedForwardToRnnPreProcessor())
.pretrain(false).backprop(true)
.backpropType(BackpropType.TruncatedBPTT)
@@ -308,28 +292,25 @@ public void testRemoveAndProcessing() {
MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToTweak)
.fineTuneConfiguration(
- new FineTuneConfiguration.Builder().seed(12345).regularization(true).l2(0.001) //l2 regularization on all layers
- .optimizationAlgo(
- OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .updater(Updater.ADAGRAD).weightInit(WeightInit.RELU)
- .iterations(1).learningRate(0.4).build())
+ new FineTuneConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers
+ .updater(new AdaGrad(0.4))
+ .weightInit(WeightInit.RELU).build())
.removeLayersFromOutput(5)
.addLayer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3)
.stride(2, 2).build())
.addLayer(new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2)
- .activation(Activation.RELU).weightInit(WeightInit.RELU)
- .updater(Updater.ADAGRAD).build())
+ .activation(Activation.RELU).weightInit(WeightInit.RELU).build())
.addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50)
- .weightInit(WeightInit.RELU).updater(Updater.ADAGRAD)
+ .weightInit(WeightInit.RELU).updater(new AdaGrad(0.5))
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
- .gradientNormalizationThreshold(10).learningRate(0.5).build())
+ .gradientNormalizationThreshold(10).build())
.addLayer(new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50)
- .weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD)
+ .weightInit(WeightInit.XAVIER).updater(new AdaGrad(0.6))
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
- .gradientNormalizationThreshold(10).learningRate(0.6).build())
+ .gradientNormalizationThreshold(10).build())
.addLayer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line
- .updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER)
+ .weightInit(WeightInit.XAVIER)
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
.gradientNormalizationThreshold(10).build())
.setInputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10))
@@ -366,12 +347,9 @@ public void testAllWithCNN() {
DataSet randomData = new DataSet(Nd4j.rand(10, 28 * 28 * 3).reshape(10, 3, 28, 28), Nd4j.rand(10, 10));
MultiLayerNetwork modelToFineTune =
new MultiLayerNetwork(
- new NeuralNetConfiguration.Builder().seed(123).iterations(1).learningRate(.01)
+ new NeuralNetConfiguration.Builder().seed(123)
.weightInit(WeightInit.XAVIER)
- .optimizationAlgo(
- OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .updater(Updater.NESTEROVS).momentum(
- 0.9)
+ .updater(new Nesterovs(0.01, 0.9))
.list()
.layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).stride(1, 1)
.nOut(20).activation(Activation.IDENTITY)
@@ -401,11 +379,10 @@ public void testAllWithCNN() {
modelToFineTune.init();
INDArray asFrozenFeatures = modelToFineTune.feedForwardToLayer(2, randomData.getFeatures(), false).get(2); //10x20x12x12
- NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().learningRate(0.2)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
+ NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.2))
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT);
- FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().learningRate(0.2)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
+ FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.2))
.build();
MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).fineTuneConfiguration(overallConf)
@@ -466,8 +443,8 @@ public void testFineTuneOverride() {
//Check that fine-tune overrides are selective - i.e., if I only specify a new LR, only the LR should be modified
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(1e-4).updater(Updater.ADAM)
- .activation(Activation.TANH).weightInit(WeightInit.RELU).regularization(true)
+ new NeuralNetConfiguration.Builder().updater(new Adam(1e-4))
+ .activation(Activation.TANH).weightInit(WeightInit.RELU)
.l1(0.1).l2(0.2).list()
.layer(0, new DenseLayer.Builder().nIn(10).nOut(5).build()).layer(1,
new OutputLayer.Builder().nIn(5).nOut(4)
@@ -478,7 +455,7 @@ public void testFineTuneOverride() {
net.init();
MultiLayerNetwork net2 = new TransferLearning.Builder(net)
- .fineTuneConfiguration(new FineTuneConfiguration.Builder().learningRate(2e-2) //Should be set on layers
+ .fineTuneConfiguration(new FineTuneConfiguration.Builder().updater(new Adam(2e-2))
.backpropType(BackpropType.TruncatedBPTT) //Should be set on MLC
.build())
.build();
@@ -486,16 +463,14 @@ public void testFineTuneOverride() {
//Check original net isn't modified:
BaseLayer l0 = (BaseLayer) net.getLayer(0).conf().getLayer();
- assertEquals(Updater.ADAM, l0.getUpdater());
+ assertEquals(new Adam(1e-4), l0.getIUpdater());
assertEquals(Activation.TANH.getActivationFunction(), l0.getActivationFn());
- assertEquals(1e-4, l0.getLearningRate(), 1e-8);
assertEquals(WeightInit.RELU, l0.getWeightInit());
assertEquals(0.1, l0.getL1(), 1e-6);
BaseLayer l1 = (BaseLayer) net.getLayer(1).conf().getLayer();
- assertEquals(Updater.ADAM, l1.getUpdater());
+ assertEquals(new Adam(1e-4), l1.getIUpdater());
assertEquals(Activation.HARDSIGMOID.getActivationFunction(), l1.getActivationFn());
- assertEquals(1e-4, l1.getLearningRate(), 1e-8);
assertEquals(WeightInit.RELU, l1.getWeightInit());
assertEquals(0.2, l1.getL2(), 1e-6);
@@ -503,16 +478,14 @@ public void testFineTuneOverride() {
//Check new net has only the appropriate things modified (i.e., LR)
l0 = (BaseLayer) net2.getLayer(0).conf().getLayer();
- assertEquals(Updater.ADAM, l0.getUpdater());
+ assertEquals(new Adam(2e-2), l0.getIUpdater());
assertEquals(Activation.TANH.getActivationFunction(), l0.getActivationFn());
- assertEquals(2e-2, l0.getLearningRate(), 1e-8);
assertEquals(WeightInit.RELU, l0.getWeightInit());
assertEquals(0.1, l0.getL1(), 1e-6);
l1 = (BaseLayer) net2.getLayer(1).conf().getLayer();
- assertEquals(Updater.ADAM, l1.getUpdater());
+ assertEquals(new Adam(2e-2), l1.getIUpdater());
assertEquals(Activation.HARDSIGMOID.getActivationFunction(), l1.getActivationFn());
- assertEquals(2e-2, l1.getLearningRate(), 1e-8);
assertEquals(WeightInit.RELU, l1.getWeightInit());
assertEquals(0.2, l1.getL2(), 1e-6);
@@ -525,12 +498,9 @@ public void testAllWithCNNNew() {
DataSet randomData = new DataSet(Nd4j.rand(10, 28 * 28 * 3).reshape(10, 3, 28, 28), Nd4j.rand(10, 10));
MultiLayerNetwork modelToFineTune =
new MultiLayerNetwork(
- new NeuralNetConfiguration.Builder().seed(123).iterations(1).learningRate(.01)
+ new NeuralNetConfiguration.Builder().seed(123).iterations(1)
.weightInit(WeightInit.XAVIER)
- .optimizationAlgo(
- OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .updater(Updater.NESTEROVS).momentum(
- 0.9)
+ .updater(new Nesterovs(0.01, 0.9))
.list()
.layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).stride(1, 1)
.nOut(20).activation(Activation.IDENTITY)
@@ -560,11 +530,8 @@ public void testAllWithCNNNew() {
modelToFineTune.init();
INDArray asFrozenFeatures = modelToFineTune.feedForwardToLayer(2, randomData.getFeatures(), false).get(2); //10x20x12x12
- NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().learningRate(0.2)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
- FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().learningRate(0.2)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
- .build();
+ NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.2));
+ FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.2)).build();
MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).fineTuneConfiguration(overallConf)
.setFeatureExtractor(1).removeLayersFromOutput(5)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestDecayPolicies.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestDecayPolicies.java
deleted file mode 100644
index 5d35059eca57..000000000000
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestDecayPolicies.java
+++ /dev/null
@@ -1,844 +0,0 @@
-package org.deeplearning4j.nn.updater;
-
-import org.apache.commons.math3.util.FastMath;
-import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
-import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
-import org.deeplearning4j.nn.api.Layer;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
-import org.deeplearning4j.nn.api.Updater;
-import org.deeplearning4j.nn.conf.LearningRatePolicy;
-import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
-import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
-import org.deeplearning4j.nn.conf.layers.BaseLayer;
-import org.deeplearning4j.nn.conf.layers.DenseLayer;
-import org.deeplearning4j.nn.conf.layers.OutputLayer;
-import org.deeplearning4j.nn.gradient.DefaultGradient;
-import org.deeplearning4j.nn.gradient.Gradient;
-import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
-import org.deeplearning4j.nn.params.DefaultParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
-import org.deeplearning4j.optimize.api.ConvexOptimizer;
-import org.deeplearning4j.optimize.solvers.StochasticGradientDescent;
-import org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction;
-import org.junit.Before;
-import org.junit.Test;
-import org.nd4j.linalg.activations.Activation;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.dataset.DataSet;
-import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
-import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.indexing.NDArrayIndex;
-import org.nd4j.linalg.learning.config.AdaGrad;
-import org.nd4j.linalg.learning.config.Adam;
-import org.nd4j.linalg.learning.config.Nesterovs;
-import org.nd4j.linalg.learning.config.RmsProp;
-import org.nd4j.linalg.lossfunctions.LossFunctions;
-import org.nd4j.linalg.ops.transforms.Transforms;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import static org.junit.Assert.assertEquals;
-
-/**
- * Test learning rate and momentum decay policies
- */
-
-
-public class TestDecayPolicies {
-
- int nIn = 3;
- int nOut = 2;
- double epsilon = 1e-8;
- INDArray gradient;
- INDArray weightGradient; // = Nd4j.ones(nIn, nOut);
- INDArray biasGradient; // = Nd4j.ones(1, nOut);
- DefaultGradient gradientSingle = new DefaultGradient();
- DefaultGradient gradientMLN = new DefaultGradient();
- INDArray val, gradExpected, vPrev;
- String key;
- Map tmpStorage, tmpStorage2, tmpStorage3, tmpStorage4 = new HashMap<>();
- org.deeplearning4j.nn.conf.Updater[] updaters = {org.deeplearning4j.nn.conf.Updater.SGD,
- org.deeplearning4j.nn.conf.Updater.ADAGRAD, org.deeplearning4j.nn.conf.Updater.ADAM,
- org.deeplearning4j.nn.conf.Updater.RMSPROP, org.deeplearning4j.nn.conf.Updater.ADAMAX};
-
- @Before
- public void beforeDo() {
- Nd4j.getRandom().setSeed(12345);
- int nLayers = 2;
- String wKey, bKey;
-
- gradient = Nd4j.ones(1, nIn * nOut + nOut);
- gradient.addi(Nd4j.rand(gradient.shape()));
- weightGradient = gradient.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nIn * nOut));
- biasGradient = gradient.get(NDArrayIndex.point(0), NDArrayIndex.interval(nIn * nOut, nIn * nOut + nOut));
-
- gradientSingle.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
- gradientSingle.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
- gradientSingle.setFlattenedGradient(gradient);
-
- for (int j = 0; j < nLayers; j++) {
- wKey = String.valueOf(j) + "_" + DefaultParamInitializer.WEIGHT_KEY;
- gradientMLN.setGradientFor(wKey, weightGradient.dup());
- bKey = String.valueOf(j) + "_" + DefaultParamInitializer.BIAS_KEY;
- gradientMLN.setGradientFor(bKey, biasGradient.dup());
- }
-
- val = null;
- gradExpected = null;
- vPrev = null;
- tmpStorage = new HashMap<>();
- tmpStorage2 = new HashMap<>();
- tmpStorage3 = new HashMap<>();
- tmpStorage4 = new HashMap<>();
-
- }
-
- @Test
- public void testLearningRateExponentialDecaySingleLayer() {
- int iterations = 2;
-
- double lr = 1e-2;
- double decayRate = 2;
- NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.Exponential)
- .lrPolicyDecayRate(decayRate).iterations(iterations)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
- .build();
-
- int numParams = conf.getLayer().initializer().numParams(conf);
- INDArray params = Nd4j.create(1, numParams);
- Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
- layer.setBackpropGradientsViewArray(Nd4j.create(params.shape()));
- Updater updater = UpdaterCreator.getUpdater(layer);
-
- Gradient gradientActual = new DefaultGradient();
- gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
- gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
- for (int i = 0; i < iterations; i++) {
- updater.update(layer, gradientActual, i, 1);
- double expectedLr = calcExponentialDecay(lr, decayRate, i);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
- }
- }
-
-
- @Test
- public void testLearningRateInverseDecaySingleLayer() {
- int iterations = 2;
-
- double lr = 1e-2;
- double decayRate = 2;
- double power = 3;
- NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.Inverse)
- .lrPolicyDecayRate(decayRate).lrPolicyPower(power).iterations(iterations)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
- .build();
-
- int numParams = conf.getLayer().initializer().numParams(conf);
- INDArray params = Nd4j.create(1, numParams);
- Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
- layer.setBackpropGradientsViewArray(Nd4j.create(params.shape()));
- Updater updater = UpdaterCreator.getUpdater(layer);
-
- Gradient gradientActual = new DefaultGradient();
- gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
- gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
-
- for (int i = 0; i < iterations; i++) {
- updater.update(layer, gradientActual, i, 1);
- double expectedLr = calcInverseDecay(lr, decayRate, i, power);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
- }
- }
-
- @Test
- public void testLearningRateStepDecaySingleLayer() {
- int iterations = 2;
-
- double lr = 1e-2;
- double decayRate = 2;
- double steps = 3;
- NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(decayRate)
- .lrPolicySteps(steps).iterations(iterations)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
- .build();
-
- int numParams = conf.getLayer().initializer().numParams(conf);
- INDArray params = Nd4j.create(1, numParams);
- Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
- layer.setBackpropGradientsViewArray(Nd4j.create(params.shape()));
- Updater updater = UpdaterCreator.getUpdater(layer);
-
- Gradient gradientActual = new DefaultGradient();
- gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
- gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
-
- for (int i = 0; i < iterations; i++) {
- updater.update(layer, gradientActual, i, 1);
- double expectedLr = calcStepDecay(lr, decayRate, i, steps);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
- }
- }
-
-
- @Test
- public void testLearningRateTorchStepDecaySingleLayer() {
- int iterations = 20;
-
- double lr = 1;
- double decayRate = .5;
- double steps = 10;
- NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.TorchStep)
- .lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
- .build();
-
- int numParams = conf.getLayer().initializer().numParams(conf);
- INDArray params = Nd4j.create(1, numParams);
- Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
- layer.setBackpropGradientsViewArray(Nd4j.create(params.shape()));
- Updater updater = UpdaterCreator.getUpdater(layer);
-
- Gradient gradientActual = new DefaultGradient();
- gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
- gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
-
- double expectedLr = lr;
- for (int i = 0; i < iterations; i++) {
- updater.update(layer, gradientActual, i, 1);
- if (i > 1 && steps % i == 0)
- expectedLr = calcTorchStepDecay(expectedLr, decayRate);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
- }
- }
-
- @Test
- public void testLearningRatePolyDecaySingleLayer() {
- int iterations = 2;
- double lr = 1e-2;
- double power = 3;
- NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.Poly).lrPolicyPower(power)
- .iterations(iterations)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
- .build();
-
- int numParams = conf.getLayer().initializer().numParams(conf);
- INDArray params = Nd4j.create(1, numParams);
- Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
- layer.setBackpropGradientsViewArray(Nd4j.create(params.shape()));
- Updater updater = UpdaterCreator.getUpdater(layer);
-
- Gradient gradientActual = new DefaultGradient();
- gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
- gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
-
- for (int i = 0; i < iterations; i++) {
- updater.update(layer, gradientActual, i, 1);
- double expectedLr = calcPolyDecay(lr, i, power, iterations);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
- }
- }
-
-
- @Test
- public void testLearningRateSigmoidDecaySingleLayer() {
- int iterations = 2;
- double lr = 1e-2;
- double decayRate = 2;
- double steps = 3;
-
- NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.Sigmoid)
- .lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
- .build();
-
- int numParams = conf.getLayer().initializer().numParams(conf);
- INDArray params = Nd4j.create(1, numParams);
- Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
- layer.setBackpropGradientsViewArray(Nd4j.create(params.shape()));
- Updater updater = UpdaterCreator.getUpdater(layer);
-
- Gradient gradientActual = new DefaultGradient();
- gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
- gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
-
- for (int i = 0; i < iterations; i++) {
- updater.update(layer, gradientActual, i, 1);
- double expectedLr = calcSigmoidDecay(layer.conf().getLearningRateByParam("W"), decayRate, i, steps);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
- assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
- }
- }
-
-
- @Test
- public void testLearningRateScheduleSingleLayer() {
- Map learningRateAfter = new HashMap<>();
- learningRateAfter.put(1, 0.2);
- int iterations = 2;
-
- for (org.deeplearning4j.nn.conf.Updater updaterFunc : updaters) {
- beforeDo();
-
- gradient.assign(1);
-
- double lr = 1e-2;
- NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr)
- .learningRateSchedule(learningRateAfter)
- .learningRateDecayPolicy(LearningRatePolicy.Schedule).iterations(iterations)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(updaterFunc).build()).build();
-
- int numParams = conf.getLayer().initializer().numParams(conf);
- INDArray params = Nd4j.create(1, numParams);
- Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
- layer.setBackpropGradientsViewArray(gradient);
- Updater updater = UpdaterCreator.getUpdater(layer);
- int stateSize = (int) ((BaseLayer) layer.conf().getLayer()).getIUpdater().stateSize(numParams);
- if (stateSize > 0)
- updater.setStateViewArray(layer, Nd4j.create(1, stateSize), true);
-
- Gradient gradientActual = new DefaultGradient(gradient);
- gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
- gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
-
- Gradient gradientExpected = new DefaultGradient();
- gradientExpected.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
- gradientExpected.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
-
- for (int i = 0; i < 2; i++) {
- updater.update(layer, gradientActual, i, 1);
-
- if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.SGD))
- lr = testSGDComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
- else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAGRAD))
- lr = testAdaGradComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
- else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAM))
- lr = testAdamComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
- else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.RMSPROP))
- lr = testRMSPropComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
- else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAMAX))
- lr = testAdaMaxComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
- assertEquals(lr, layer.conf().getLearningRateByParam("W"), 1e-4);
- }
- }
- }
-
-
- @Test
- public void testLearningRateScheduleMLN() {
- Map learningRateAfter = new HashMap<>();
- learningRateAfter.put(1, 0.2);
- int iterations = 2;
- int[] nIns = {4, 2};
- int[] nOuts = {2, 3};
-
- for (org.deeplearning4j.nn.conf.Updater updaterFunc : updaters) {
- beforeDo();
-
- double lr = 1e-2;
-
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.Schedule)
- .learningRateSchedule(learningRateAfter).iterations(iterations).updater(updaterFunc).list()
- .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).build())
- .layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]).build()).backprop(true)
- .pretrain(false).build();
-
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
-
- Updater updater = UpdaterCreator.getUpdater(net);
-
- INDArray gradViewArr = net.getFlattenedGradients();
-
- String wKey, bKey;
-
- for (int i = 0; i < 2; i++) {
- Gradient gradientActual = new DefaultGradient();
- Gradient gradientExpected = new DefaultGradient();
- int paramsSoFar = 0;
- for (int k = 0; k < net.getnLayers(); k++) {
- int nParams = net.getLayer(k).numParams();
- INDArray g = gradViewArr.get(NDArrayIndex.point(0),
- NDArrayIndex.interval(paramsSoFar, paramsSoFar + nParams));
- int nW = nIns[k] * nOuts[k];
- int nB = nOuts[k];
- INDArray gw = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nW));
- INDArray gb = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(nW, nW + nB));
- wKey = String.valueOf(k) + "_" + DefaultParamInitializer.WEIGHT_KEY;
- gradientActual.setGradientFor(wKey, gw);
- gradientExpected.setGradientFor(wKey, gw.dup());
- bKey = String.valueOf(k) + "_" + DefaultParamInitializer.BIAS_KEY;
- gradientActual.setGradientFor(bKey, gb);
- gradientExpected.setGradientFor(bKey, gb.dup());
-
- paramsSoFar += nParams;
- }
-
- updater.update(net, gradientActual, i, 1);
- if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.SGD))
- lr = testSGDComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
- else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAGRAD))
- lr = testAdaGradComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
- else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAM))
- lr = testAdamComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
- else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.RMSPROP))
- lr = testRMSPropComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
-
- if (i == 0)
- assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), lr);
- else
- assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), learningRateAfter.get(1));
- }
- }
- }
-
- @Test
- public void testLearningRateScoreDecay() {
- double lr = 0.01;
- double lrScoreDecay = 0.10;
- int[] nIns = {4, 2};
- int[] nOuts = {2, 3};
- int oldScore = 1;
- int newScore = 1;
- int iteration = 3;
- INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]);
-
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr)
- .learningRateDecayPolicy(LearningRatePolicy.Score).lrPolicyDecayRate(lrScoreDecay).list()
- .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0])
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
- .layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1])
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
- .backprop(true).pretrain(false).build();
-
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
-
- ConvexOptimizer opt = new StochasticGradientDescent(net.getDefaultConfiguration(),
- new NegativeDefaultStepFunction(), null, net);
- opt.checkTerminalConditions(gradientW, oldScore, newScore, iteration);
- assertEquals(lrScoreDecay, net.getLayer(0).conf().getLrPolicyDecayRate(), 1e-4);
- assertEquals(lr * (lrScoreDecay + Nd4j.EPS_THRESHOLD), net.getLayer(0).conf().getLearningRateByParam("W"),
- 1e-4);
-
- }
-
- @Test
- public void testOriginalLearningRateUnchanged() {
- // Confirm learning rate is unchanged while hash is updated
-
- DataSet ds = new IrisDataSetIterator(150, 150).next();
- ds.normalizeZeroMeanZeroUnitVariance();
-
- Nd4j.getRandom().setSeed(12345);
-
- MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().regularization(false)
- .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).learningRate(1.0)
- .learningRateDecayPolicy(LearningRatePolicy.Score).lrPolicyDecayRate(0.10)
- .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).seed(12345L).list()
- .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.SIGMOID)
- .build())
- .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE)
- .activation(Activation.TANH).nIn(3).nOut(3).build())
- .pretrain(false).backprop(true).build();
- MultiLayerNetwork mln = new MultiLayerNetwork(conf);
- mln.init();
-
- //Run a number of iterations of learning
- mln.setInput(ds.getFeatureMatrix());
- mln.setLabels(ds.getLabels());
- mln.computeGradientAndScore();
- for (int j = 0; j < 1; j++)
- mln.fit(ds);
- mln.computeGradientAndScore();
-
- double lr0 = ((BaseLayer) mln.getLayer(0).conf().getLayer()).getLearningRate();
- double lr1 = ((BaseLayer) mln.getLayer(1).conf().getLayer()).getLearningRate();
- assertEquals(1.0, lr0, 0.0);
- assertEquals(1.0, lr1, 0.0);
- }
-
- @Test
- public void testMomentumScheduleSingleLayer() {
- double lr = 1e-2;
- double mu = 0.9;
- Map momentumAfter = new HashMap<>();
- momentumAfter.put(1, 0.2);
- int iterations = 2;
-
- NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu)
- .momentumAfter(momentumAfter).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn)
- .nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build())
- .build();
-
- int numParams = conf.getLayer().initializer().numParams(conf);
- INDArray params = Nd4j.create(1, numParams);
- Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
- layer.setBackpropGradientsViewArray(gradient);
- Updater updater = UpdaterCreator.getUpdater(layer);
-
- Gradient gradientExpected = new DefaultGradient();
- gradientExpected.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
- gradientExpected.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
-
- for (int i = 0; i < 2; i++) {
- updater.update(layer, gradientSingle, i, 1);
- mu = testNesterovsComputation(gradientSingle, gradientExpected, lr, mu, momentumAfter, i);
- assertEquals(mu, ((BaseLayer) layer.conf().getLayer()).getMomentum(), 1e-4);
- }
- }
-
- @Test
- public void testMomentumScheduleMLN() {
- double lr = 1e-2;
- double mu = 0.6;
- Map momentumAfter = new HashMap<>();
- momentumAfter.put(1, 0.2);
- int iterations = 2;
- int[] nIns = {4, 2};
- int[] nOuts = {2, 3};
-
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu)
- .momentumAfter(momentumAfter).iterations(iterations).list()
- .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0])
- .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build())
- .layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1])
- .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build())
- .backprop(true).pretrain(false).build();
-
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
-
- Updater updater = UpdaterCreator.getUpdater(net);
- int stateSize = (int) new Nesterovs().stateSize(net.numParams());
- updater.setStateViewArray(net, Nd4j.create(1, stateSize), true);
-
- String wKey, bKey;
-
- Gradient gradientMLN = new DefaultGradient();
- INDArray gradViewArr = net.getGradientsViewArray();
- int paramsSoFar = 0;
- for (int j = 0; j < 2; j++) {
- int nParams = net.getLayer(j).numParams();
- INDArray g = gradViewArr.get(NDArrayIndex.point(0),
- NDArrayIndex.interval(paramsSoFar, paramsSoFar + nParams));
- int nW = nIns[j] * nOuts[j];
- int nB = nOuts[j];
- INDArray gw = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nW));
- INDArray gb = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(nW, nW + nB));
- wKey = String.valueOf(j) + "_" + DefaultParamInitializer.WEIGHT_KEY;
- gradientMLN.setGradientFor(wKey, gw);
- bKey = String.valueOf(j) + "_" + DefaultParamInitializer.BIAS_KEY;
- gradientMLN.setGradientFor(bKey, gb);
- paramsSoFar += nParams;
- }
-
- Gradient gradientExpected = new DefaultGradient();
- gradViewArr = gradViewArr.dup();
- paramsSoFar = 0;
- for (int j = 0; j < net.getnLayers(); j++) {
- int nParams = net.getLayer(j).numParams();
- INDArray g = gradViewArr.get(NDArrayIndex.point(0),
- NDArrayIndex.interval(paramsSoFar, paramsSoFar + nParams));
- int nW = nIns[j] * nOuts[j];
- int nB = nOuts[j];
- INDArray gw = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nW));
- INDArray gb = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(nW, nW + nB));
- wKey = String.valueOf(j) + "_" + DefaultParamInitializer.WEIGHT_KEY;
- gradientExpected.setGradientFor(wKey, gw);
- bKey = String.valueOf(j) + "_" + DefaultParamInitializer.BIAS_KEY;
- gradientExpected.setGradientFor(bKey, gb);
- }
-
-
-
- for (int i = 0; i < 2; i++) {
- updater.update(net, gradientMLN, i, 1);
- mu = testNesterovsComputation(gradientMLN, gradientExpected, lr, mu, momentumAfter, i);
- assertEquals(mu, ((BaseLayer) net.getLayer(1).conf().getLayer()).getMomentum(), 1e-4);
- }
- }
-
-
- @Test
- public void testUpdatingInConf() throws Exception {
-
- OptimizationAlgorithm[] optimizationAlgorithms = new OptimizationAlgorithm[] {
- OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT, OptimizationAlgorithm.LINE_GRADIENT_DESCENT,
- OptimizationAlgorithm.CONJUGATE_GRADIENT, OptimizationAlgorithm.LBFGS};
-
- for (OptimizationAlgorithm oa : optimizationAlgorithms) {
- Map momentumSchedule = new HashMap<>();
- double m = 0.001;
- for (int i = 0; i <= 100; i++) {
- momentumSchedule.put(i, Math.min(m, 0.9999));
- m += 0.001;
- }
-
- Map learningRateSchedule = new HashMap<>();
- double lr = 0.1;
- for (int i = 0; i <= 100; i++) {
- learningRateSchedule.put(i, lr);
- lr *= 0.96;
- }
-
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(oa).iterations(1)
- .learningRateDecayPolicy(LearningRatePolicy.Schedule)
- .learningRateSchedule(learningRateSchedule)
- .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).weightInit(WeightInit.XAVIER)
- .momentum(0.9).momentumAfter(momentumSchedule).regularization(true).l2(0.0001).list()
- .layer(0, new DenseLayer.Builder().nIn(784).nOut(10).build())
- .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).build()).pretrain(false).backprop(true)
- .build();
-
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
-
- int last_layer_index = 1;
-
- DataSetIterator trainIter = new MnistDataSetIterator(64, true, 12345);
-
-
- int count = 0;
- while (trainIter.hasNext()) {
- net.fit(trainIter.next());
-
- // always print the same number (0.1 and 0.9)
- double lrLastLayer = (net.getLayer(last_layer_index)).conf().getLearningRateByParam("W");
- double mLastLayer = ((BaseLayer) (net.getLayer(last_layer_index)).conf().getLayer()).getMomentum();
-
- assertEquals(learningRateSchedule.get(count), lrLastLayer, 1e-6);
- assertEquals(momentumSchedule.get(count), mLastLayer, 1e-6);
-
- if (count++ >= 100)
- break;
- }
- }
- }
-
- ///// Updater Calculations
-
- public double testSGDComputation(Gradient gradientActual, Gradient gradientExpected, double lr,
- Map learningRateAfter, int i) {
- for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) {
- if (learningRateAfter != null)
- lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr;
- key = entry.getKey();
- val = entry.getValue();
- gradExpected = val.mul(lr);
- gradientExpected.setGradientFor(key, gradExpected);
- INDArray act = gradientActual.getGradientFor(key);
- assertEquals(gradExpected, act);
- }
- return lr;
- }
-
- public double testNesterovsComputation(Gradient gradientActual, Gradient gradientExpected, double lr, double mu,
- Map momentumAfter, int i) {
-
- for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) {
- if (momentumAfter != null)
- mu = (momentumAfter.containsKey(i)) ? momentumAfter.get(i) : mu;
- key = entry.getKey();
- val = entry.getValue();
- INDArray vTmp = tmpStorage.get(key);
-
- if (vTmp == null)
- vTmp = Nd4j.zeros(val.shape());
- vPrev = vTmp;
- vTmp = vPrev.mul(mu).subi(val.mul(lr));
- gradExpected = vPrev.muli(mu).addi(vTmp.mul(-mu - 1));
- gradientExpected.setGradientFor(key, gradExpected);
-
- INDArray act = gradientActual.getGradientFor(entry.getKey());
- assertEquals(gradExpected, act);
- tmpStorage.put(key, vTmp);
- }
- return mu;
- }
-
-
- public double testAdaGradComputation(Gradient gradientActual, Gradient gradientExpected, double lr,
- Map learningRateAfter, int i) {
-
- double epsilon = AdaGrad.DEFAULT_ADAGRAD_EPSILON;
-
- for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) {
- if (learningRateAfter != null)
- lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr;
- key = entry.getKey();
- val = entry.getValue();
- INDArray historicalGradient = tmpStorage.get(key);
-
- if (historicalGradient == null)
- historicalGradient = val.mul(val);
- else
- historicalGradient.addi(val.mul(val));
-
- gradExpected = Transforms.sqrt(historicalGradient.add(epsilon)).rdiv(lr).mul(val);
- assertEquals(gradExpected, gradientActual.getGradientFor(key));
- gradientExpected.setGradientFor(key, gradExpected);
- tmpStorage.put(key, historicalGradient);
- }
-
- return lr;
- }
-
- public double testAdamComputation(Gradient gradientActual, Gradient gradientExpected, double lr,
- Map learningRateAfter, int i) {
- double beta1 = 0.9;
- double beta2 = 0.999;
- double epsilon = Adam.DEFAULT_ADAM_EPSILON;
-
- for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) {
- if (learningRateAfter != null)
- lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr;
- key = entry.getKey();
- val = entry.getValue();
-
- INDArray mTmp = tmpStorage2.get(key);
- INDArray vTmp = tmpStorage3.get(key);
-
- if (mTmp == null)
- mTmp = Nd4j.zeros(val.shape());
- if (vTmp == null)
- vTmp = Nd4j.zeros(val.shape());
-
- mTmp.muli(beta1).addi(val.mul(1.0 - beta1));
- vTmp.muli(beta2).addi(val.mul(val).mul(1.0 - beta2));
-
- double beta1t = FastMath.pow(beta1, i + 1);
- double beta2t = FastMath.pow(beta2, i + 1);
- double alphat = lr * FastMath.sqrt(1 - beta2t) / (1 - beta1t);
- if (Double.isNaN(alphat) || alphat == 0.0)
- alphat = epsilon;
-
- gradExpected = mTmp.mul(alphat).divi(Transforms.sqrt(vTmp).addi(epsilon));
- gradientExpected.setGradientFor(key, gradExpected);
- assertEquals(gradExpected, gradientActual.getGradientFor(key));
-
- tmpStorage2.put(key, mTmp);
- tmpStorage3.put(key, vTmp);
- }
- return lr;
- }
-
- public double testAdaMaxComputation(Gradient gradientActual, Gradient gradientExpected, double lr,
- Map learningRateAfter, int i) {
-
- double beta1 = 0.9;
- double beta2 = 0.999;
-
- for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) {
- if (learningRateAfter != null)
- lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr;
- key = entry.getKey();
- val = entry.getValue();
-
- INDArray mTmp = tmpStorage2.get(key);
- INDArray uTmp = tmpStorage3.get(key);
-
- if (mTmp == null)
- mTmp = Nd4j.zeros(val.shape());
- if (uTmp == null)
- uTmp = Nd4j.zeros(val.shape());
-
- mTmp.muli(beta1).addi(val.mul(1.0 - beta1));
- uTmp.assign(Transforms.max(uTmp.mul(beta2), Transforms.abs(val)));
-
- double beta1t = FastMath.pow(beta1, i + 1);
- double alphat = lr / (1 - beta1t);
- if (Double.isNaN(alphat) || alphat == 0.0)
- alphat = epsilon;
-
- gradExpected = mTmp.mul(alphat).divi(uTmp);
- gradientExpected.setGradientFor(key, gradExpected);
- assertEquals(gradExpected, gradientActual.getGradientFor(key));
-
- tmpStorage2.put(key, mTmp);
- tmpStorage3.put(key, uTmp);
- }
- return lr;
- }
-
- public double testRMSPropComputation(Gradient gradientActual, Gradient gradientExpected, double lr,
- Map learningRateAfter, int i) {
- double rmsDecay = RmsProp.DEFAULT_RMSPROP_RMSDECAY;
- double epsilon = RmsProp.DEFAULT_RMSPROP_EPSILON;
-
- for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) {
- if (learningRateAfter != null)
- lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr;
- key = entry.getKey();
- val = entry.getValue();
- INDArray lastGTmp = tmpStorage4.get(key);
-
- if (lastGTmp == null)
- lastGTmp = Nd4j.valueArrayOf(val.shape(), epsilon);
-
- lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay));
- gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(epsilon)));
- gradientExpected.setGradientFor(key, gradExpected);
-
- assertEquals(gradExpected, gradientActual.getGradientFor(key));
- tmpStorage4.put(key, lastGTmp);
- }
-
- return lr;
- }
-
- ///// Learning Rate Decay Policy Calculations
-
- public double calcExponentialDecay(double lr, double decayRate, double iteration) {
- return lr * Math.pow(decayRate, iteration);
- }
-
- public double calcInverseDecay(double lr, double decayRate, double iteration, double power) {
- return lr / Math.pow((1 + decayRate * iteration), power);
- }
-
- public double calcStepDecay(double lr, double decayRate, double iteration, double steps) {
- return lr * Math.pow(decayRate, Math.floor(iteration / steps));
- }
-
- public double calcTorchStepDecay(double lr, double decayRate) {
- return lr * decayRate;
- }
-
- public double calcPolyDecay(double lr, double iteration, double power, double maxIterations) {
- return lr * Math.pow((1 - iteration / maxIterations), power);
- }
-
- public double calcSigmoidDecay(double lr, double decayRate, double iteration, double steps) {
- return lr / (1 + Math.exp(-decayRate * (iteration - steps)));
- }
-
-}
-
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java
index 27d3c2e0b99b..337d8726ec5e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java
@@ -13,6 +13,7 @@
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.NoOp;
import static org.junit.Assert.*;
@@ -24,7 +25,7 @@ public void testRenormalizatonPerLayer() {
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.layer(new DenseLayer.Builder().nIn(10).nOut(20)
- .updater(org.deeplearning4j.nn.conf.Updater.NONE)
+ .updater(new NoOp())
.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).build())
.build();
@@ -43,7 +44,7 @@ public void testRenormalizatonPerLayer() {
gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
Updater updater = UpdaterCreator.getUpdater(layer);
- updater.update(layer, gradient, 0, 1);
+ updater.update(layer, gradient, 0, 0, 1);
assertNotEquals(weightGradCopy, weightGrad);
assertNotEquals(biasGradCopy, biasGrad);
@@ -70,7 +71,7 @@ public void testRenormalizationPerParamType() {
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.layer(new DenseLayer.Builder().nIn(10).nOut(20)
- .updater(org.deeplearning4j.nn.conf.Updater.NONE)
+ .updater(new NoOp())
.gradientNormalization(GradientNormalization.RenormalizeL2PerParamType).build())
.build();
@@ -87,7 +88,7 @@ public void testRenormalizationPerParamType() {
gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
- updater.update(layer, gradient, 0, 1);
+ updater.update(layer, gradient, 0, 0, 1);
INDArray normWeightsExpected = weightGradCopy.div(weightGradCopy.norm2Number());
INDArray normBiasExpected = biasGradCopy.div(biasGradCopy.norm2Number());
@@ -102,7 +103,7 @@ public void testAbsValueClippingPerElement() {
double threshold = 3;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(
- new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE)
+ new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp())
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
.gradientNormalizationThreshold(threshold).build())
.build();
@@ -122,7 +123,7 @@ public void testAbsValueClippingPerElement() {
gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
Updater updater = UpdaterCreator.getUpdater(layer);
- updater.update(layer, gradient, 0, 1);
+ updater.update(layer, gradient, 0, 0, 1);
assertNotEquals(weightGradCopy, weightGrad);
assertNotEquals(biasGradCopy, biasGrad);
@@ -158,7 +159,7 @@ public void testL2ClippingPerLayer() {
//t=1: large -> clipping
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(
- new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE)
+ new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp())
.gradientNormalization(GradientNormalization.ClipL2PerLayer)
.gradientNormalizationThreshold(threshold).build())
.build();
@@ -185,7 +186,7 @@ public void testL2ClippingPerLayer() {
assertTrue(layerGradL2 > threshold);
Updater updater = UpdaterCreator.getUpdater(layer);
- updater.update(layer, gradient, 0, 1);
+ updater.update(layer, gradient, 0, 0, 1);
if (t == 0) {
//norm2 < threshold -> no change
@@ -213,7 +214,7 @@ public void testL2ClippingPerParamType() {
double threshold = 3;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(
- new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE)
+ new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp())
.gradientNormalization(GradientNormalization.ClipL2PerParamType)
.gradientNormalizationThreshold(threshold).build())
.build();
@@ -236,7 +237,7 @@ public void testL2ClippingPerParamType() {
assertTrue(weightL2 < threshold);
assertTrue(biasL2 > threshold);
- updater.update(layer, gradient, 0, 1);
+ updater.update(layer, gradient, 0, 0, 1);
assertEquals(weightGradCopy, weightGrad); //weight norm2 < threshold -> no change
assertNotEquals(biasGradCopy, biasGrad); //bias norm2 > threshold -> rescale
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java
index 67dffe949b42..8495b491ba5d 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java
@@ -65,10 +65,10 @@ public void testAdaDeltaUpdate() {
double rho = 0.85;
- NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().rho(rho)
+ NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA)
- .epsilon(Nd4j.EPS_THRESHOLD).build())
+ .updater(new AdaDelta(rho, Nd4j.EPS_THRESHOLD))
+ .build())
.build();
int numParams = conf.getLayer().initializer().numParams(conf);
@@ -89,7 +89,7 @@ public void testAdaDeltaUpdate() {
int count = 0;
for (int i = 0; i < 2; i++) {
- updater.update(layer, gradient, i, 1);
+ updater.update(layer, gradient, i, 0, 1);
// calculations for one iteration / update
@@ -121,7 +121,7 @@ public void testAdaDeltaUpdate() {
msdx.put(key, msdxTmp);
count++;
}
- assertEquals(rho, layer.layerConf().getRho(), 1e-4);
+ assertEquals(rho, ((AdaDelta)layer.layerConf().getIUpdater()).getRho(), 1e-4);
}
assertEquals(4, count);
@@ -133,9 +133,8 @@ public void testAdaGradUpdater() {
double epsilon = AdaGrad.DEFAULT_ADAGRAD_EPSILON;
NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build())
+ new NeuralNetConfiguration.Builder().updater(new AdaGrad(lr))
+ .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
.build();
int numParams = conf.getLayer().initializer().numParams(conf);
@@ -154,7 +153,7 @@ public void testAdaGradUpdater() {
gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wg);
gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.BIAS_KEY, bg);
- updater.update(layer, gradient, -1, 1);
+ updater.update(layer, gradient, -1, 0, 1);
int count = 0;
for (Map.Entry entry : gradientCopyPreUpdate.gradientForVariable().entrySet()) {
@@ -163,7 +162,7 @@ public void testAdaGradUpdater() {
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
count++;
}
- assertEquals(lr, layer.layerConf().getLearningRate(), 1e-4);
+ assertEquals(lr, ((AdaGrad)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4);
assertEquals(2, count);
}
@@ -177,9 +176,8 @@ public void testAdamUpdater() {
double beta2 = 0.888;
double epsilon = Adam.DEFAULT_ADAM_EPSILON;
- NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr)
- .iterations(iteration).adamMeanDecay(beta1).adamVarDecay(beta2).layer(new DenseLayer.Builder()
- .nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADAM).build())
+ NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Adam(lr, beta1, beta2, Adam.DEFAULT_ADAM_EPSILON))
+ .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
.build();
int numParams = conf.getLayer().initializer().numParams(conf);
@@ -191,7 +189,7 @@ public void testAdamUpdater() {
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
- updater.update(layer, gradient, iteration, 1);
+ updater.update(layer, gradient, iteration, 0, 1);
double beta1t = FastMath.pow(beta1, iteration + 1);
double beta2t = FastMath.pow(beta2, iteration + 1);
@@ -223,8 +221,8 @@ public void testAdamUpdater() {
count++;
}
- assertEquals(beta1, layer.layerConf().getAdamMeanDecay(), 1e-4);
- assertEquals(beta2, layer.layerConf().getAdamVarDecay(), 1e-4);
+ assertEquals(beta1, ((Adam)layer.layerConf().getIUpdater()).getBeta1(), 1e-4);
+ assertEquals(beta2, ((Adam)layer.layerConf().getIUpdater()).getBeta2(), 1e-4);
assertEquals(2, count);
}
@@ -238,12 +236,12 @@ public void testNadamUpdater() {
double epsilon = Nadam.DEFAULT_NADAM_EPSILON;
NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr).iterations(iteration)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(new Nadam.Builder().learningRate(lr).beta1(beta1)
- .beta2(beta2).epsilon(epsilon).build())
- .build())
- .build();
+ new NeuralNetConfiguration.Builder().iterations(iteration)
+ .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
+ .updater(new Nadam.Builder().learningRate(lr).beta1(beta1)
+ .beta2(beta2).epsilon(epsilon).build())
+ .build())
+ .build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
@@ -258,7 +256,7 @@ public void testNadamUpdater() {
/*
* Making update for layer
* */
- updater.update(layer, gradient, iteration, 1);
+ updater.update(layer, gradient, iteration, 0,1);
double beta1t = FastMath.pow(beta1, iteration + 1);
@@ -330,9 +328,9 @@ public void testAdaMaxUpdater() {
double beta2 = 0.888;
double epsilon = AdaMax.DEFAULT_ADAMAX_EPSILON;
- NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr)
- .iterations(iteration).adamMeanDecay(beta1).adamVarDecay(beta2).layer(new DenseLayer.Builder()
- .nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADAMAX).build())
+ NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
+ .updater(new AdaMax(lr, beta1, beta2, AdaMax.DEFAULT_ADAMAX_EPSILON))
+ .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
.build();
int numParams = conf.getLayer().initializer().numParams(conf);
@@ -344,7 +342,7 @@ public void testAdaMaxUpdater() {
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
- updater.update(layer, gradient, iteration, 1);
+ updater.update(layer, gradient, iteration, 0, 1);
double beta1t = FastMath.pow(beta1, iteration + 1);
double beta2t = FastMath.pow(beta2, iteration + 1);
@@ -376,8 +374,8 @@ public void testAdaMaxUpdater() {
count++;
}
- assertEquals(beta1, layer.layerConf().getAdamMeanDecay(), 1e-4);
- assertEquals(beta2, layer.layerConf().getAdamVarDecay(), 1e-4);
+ assertEquals(beta1, ((AdaMax)layer.layerConf().getIUpdater()).getBeta1(), 1e-4);
+ assertEquals(beta2, ((AdaMax)layer.layerConf().getIUpdater()).getBeta2(), 1e-4);
assertEquals(2, count);
}
@@ -387,9 +385,8 @@ public void testNestorovsUpdater() {
double mu = 0.6;
NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build())
+ new NeuralNetConfiguration.Builder().updater(new Nesterovs(lr, mu))
+ .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
.build();
int numParams = conf.getLayer().initializer().numParams(conf);
@@ -408,7 +405,7 @@ public void testNestorovsUpdater() {
gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wg);
gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.BIAS_KEY, bg);
- updater.update(layer, gradient, -1, 1);
+ updater.update(layer, gradient, -1, 0, 1);
int count = 0;
for (Map.Entry entry : gradientCopyPreUpdate.gradientForVariable().entrySet()) {
@@ -422,7 +419,7 @@ public void testNestorovsUpdater() {
count++;
}
- assertEquals(mu, layer.layerConf().getMomentum(), 1e-4);
+ assertEquals(mu, ((Nesterovs)layer.layerConf().getIUpdater()).getMomentum(), 1e-4);
assertEquals(2, count);
}
@@ -435,9 +432,8 @@ public void testRMSPropUpdater() {
NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr).rmsDecay(rmsDecay)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build())
+ new NeuralNetConfiguration.Builder().updater(new RmsProp(lr,rmsDecay, RmsProp.DEFAULT_RMSPROP_EPSILON))
+ .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
.build();
int numParams = conf.getLayer().initializer().numParams(conf);
@@ -457,7 +453,7 @@ public void testRMSPropUpdater() {
gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wg);
gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.BIAS_KEY, bg);
- updater.update(layer, gradient, -1, 1);
+ updater.update(layer, gradient, -1, 0, 1);
double epsilon = 1e-8;
@@ -475,7 +471,7 @@ public void testRMSPropUpdater() {
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
lastG.put(key, lastGTmp);
}
- assertEquals(rmsDecay, layer.layerConf().getRmsDecay(), 1e-4);
+ assertEquals(rmsDecay, ((RmsProp)layer.layerConf().getIUpdater()).getRmsDecay(), 1e-4);
}
@Test
@@ -483,9 +479,8 @@ public void testSGDUpdater() {
double lr = 0.05;
NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
+ new NeuralNetConfiguration.Builder().updater(new Sgd(lr))
+ .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
.build();
int numParams = conf.getLayer().initializer().numParams(conf);
@@ -501,14 +496,14 @@ public void testSGDUpdater() {
gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wg);
gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.BIAS_KEY, bg);
- updater.update(layer, gradient, -1, 1);
+ updater.update(layer, gradient, -1, 0, 1);
for (Map.Entry entry : gradientCopyPreUpdate.gradientForVariable().entrySet()) {
val = entry.getValue();
gradExpected = val.mul(lr);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
- assertEquals(lr, layer.layerConf().getLearningRate(), 1e-4);
+ assertEquals(lr, ((Sgd)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4);
}
@@ -518,9 +513,8 @@ public void testNoOpUpdater() {
double lr = 0.5;
NeuralNetConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(lr)
- .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut)
- .updater(org.deeplearning4j.nn.conf.Updater.NONE).build())
+ new NeuralNetConfiguration.Builder().updater(new NoOp())
+ .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build())
.build();
int numParams = conf.getLayer().initializer().numParams(conf);
@@ -540,7 +534,7 @@ public void testNoOpUpdater() {
gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, wg);
gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, bg);
- updater.update(layer, gradient, -1, 1);
+ updater.update(layer, gradient, -1, 0, 1);
INDArray weightGradActual = gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY);
INDArray biasGradActual = gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY);
@@ -555,15 +549,14 @@ public void testMultiLayerUpdater() throws Exception {
Nd4j.getRandom().setSeed(12345L);
double lr = 0.03;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(0.6).list()
- .layer(0, new DenseLayer.Builder().nIn(4).nOut(5)
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list()
+ .layer(0, new DenseLayer.Builder().nIn(4).nOut(5).updater(new Sgd(lr)).build())
.layer(1, new DenseLayer.Builder().nIn(5).nOut(6)
- .updater(org.deeplearning4j.nn.conf.Updater.NONE).build())
+ .updater(new NoOp()).build())
.layer(2, new DenseLayer.Builder().nIn(6).nOut(7)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build())
+ .updater(new AdaGrad(lr)).build())
.layer(3, new OutputLayer.Builder().nIn(7).nOut(8)
- .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS)
+ .updater(new Nesterovs(0.6))
.activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE)
.build())
.build();
@@ -636,13 +629,13 @@ public void testMultiLayerUpdater() throws Exception {
layerGradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wGrad.dup());
layerGradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, bGrad.dup());
- uArr[j].getConfig().applySchedules(0, net.getLayer(j).conf().getLearningRateByParam("W"));
+// uArr[j].getConfig().applySchedules(0, net.getLayer(j).conf().getLearningRateByParam("W"));
for (String s : layerGradient.gradientForVariable().keySet()) {
expectedGradient.put(j + "_" + s, layerGradient.getGradientFor(s));
}
}
- updater.update(net, gradient, i, 1);
+ updater.update(net, gradient, i, 0, 1);
assertEquals(gradient.gradientForVariable(), expectedGradient);
}
}
@@ -657,11 +650,11 @@ public void testSetGetUpdater() {
int nIn = 4;
int nOut = 8;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(0.6).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(lr,0.6)).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5)
.updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
.layer(1, new DenseLayer.Builder().nIn(5).nOut(6)
- .updater(org.deeplearning4j.nn.conf.Updater.NONE).build())
+ .updater(new NoOp()).build())
.layer(2, new DenseLayer.Builder().nIn(6).nOut(7)
.updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build())
.layer(3, new OutputLayer.Builder().nIn(7).nOut(nOut)
@@ -688,11 +681,11 @@ public void testSetGetUpdater2() {
int nIn = 4;
int nOut = 8;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(0.6).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(lr,0.6)).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5)
.updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
.layer(1, new DenseLayer.Builder().nIn(5).nOut(6)
- .updater(org.deeplearning4j.nn.conf.Updater.NONE).build())
+ .updater(new NoOp()).build())
.layer(2, new DenseLayer.Builder().nIn(6).nOut(7)
.updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build())
.layer(3, new OutputLayer.Builder().nIn(7).nOut(nOut)
@@ -707,67 +700,6 @@ public void testSetGetUpdater2() {
assertTrue(newUpdater == net.getUpdater()); //Should be identical object
}
-
- @Test
- public void testEpsilon() {
- //Test epsilon setting - adagrad
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).epsilon(0.123).build())
- .layer(2, new OutputLayer.Builder().nIn(2).nOut(2).epsilon(0.456).build()).build();
-
- assertEquals(1e-6, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(0).getLayer()).getEpsilon(),
- 0.0);
- assertEquals(0.123, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(1).getLayer()).getEpsilon(),
- 0.0);
- assertEquals(0.456, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(2).getLayer()).getEpsilon(),
- 0.0);
-
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
- // net.fit(Nd4j.create(1,2), Nd4j.create(1,2));
- MultiLayerUpdater updater = (MultiLayerUpdater) net.getUpdater();
- List l = updater.getUpdaterBlocks();
-
- AdaGrad adaGrad = (AdaGrad) l.get(0).getGradientUpdater().getConfig();
- assertEquals(1e-6, adaGrad.getEpsilon(), 0.0);
-
- AdaGrad adaGrad1 = (AdaGrad) l.get(1).getGradientUpdater().getConfig();
- assertEquals(0.123, adaGrad1.getEpsilon(), 0.0);
-
- AdaGrad adaGrad2 = (AdaGrad) l.get(2).getGradientUpdater().getConfig();
- assertEquals(0.456, adaGrad2.getEpsilon(), 0.0);
-
-
- //Test epsilon setting - adadelta
- conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).epsilon(0.123).build())
- .layer(2, new OutputLayer.Builder().nIn(2).nOut(2).epsilon(0.456).build()).build();
-
- assertEquals(1e-6, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(0).getLayer()).getEpsilon(),
- 0.0);
- assertEquals(0.123, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(1).getLayer()).getEpsilon(),
- 0.0);
- assertEquals(0.456, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(2).getLayer()).getEpsilon(),
- 0.0);
-
- net = new MultiLayerNetwork(conf);
- net.init();
- updater = (MultiLayerUpdater) net.getUpdater();
- l = updater.getUpdaterBlocks();
-
- AdaDelta adaDelta = (AdaDelta) l.get(0).getGradientUpdater().getConfig();
- assertEquals(1e-6, adaDelta.getEpsilon(), 0.0);
-
- AdaDelta adaDelta1 = (AdaDelta) l.get(1).getGradientUpdater().getConfig();
- assertEquals(0.123, adaDelta1.getEpsilon(), 0.0);
-
- AdaDelta adaDelta2 = (AdaDelta) l.get(2).getGradientUpdater().getConfig();
- assertEquals(0.456, adaDelta2.getEpsilon(), 0.0);
- }
-
@Test
public void testPretrain() {
@@ -786,8 +718,7 @@ public void testPretrain() {
gradient.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient);
- NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).seed(42)
- .updater(org.deeplearning4j.nn.conf.Updater.SGD)
+ NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(lr)).seed(42)
.layer(new org.deeplearning4j.nn.conf.layers.RBM.Builder()
.lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY)
.activation(Activation.IDENTITY).nIn(nIn).nOut(nOut).build())
@@ -808,14 +739,14 @@ public void testPretrain() {
gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.BIAS_KEY, bg);
gradientCopyPreUpdate.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbg);
- updater.update(layer, gradient, -1, 1);
+ updater.update(layer, gradient, -1, 0, 1);
for (Map.Entry entry : gradientCopyPreUpdate.gradientForVariable().entrySet()) {
val = entry.getValue();
gradExpected = val.mul(lr);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
- assertEquals(lr, layer.layerConf().getLearningRate(), 1e-4);
+ assertEquals(lr, ((Sgd)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4);
//Test with pretrain == false
@@ -845,7 +776,7 @@ public void testPretrain() {
layer.setBackpropGradientsViewArray(gradients);
updater = UpdaterCreator.getUpdater(layer);
- updater.update(layer, gradient, -1, 1);
+ updater.update(layer, gradient, -1, 0, 1);
for (Map.Entry entry : gradientCopyPreUpdate.gradientForVariable().entrySet()) {
// System.out.println(entry.getKey());
@@ -859,49 +790,7 @@ public void testPretrain() {
// System.out.println(gradExpected + "\t" + gradient.getGradientFor(entry.getKey()));
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
- assertEquals(lr, layer.layerConf().getLearningRate(), 1e-4);
- }
-
- @Test
- public void testEpsilonAllUpdaters() {
-
- double e = 7e-2;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().epsilon(e).list()
- .layer(0, new DenseLayer.Builder().nIn(2).nOut(2)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAM).build())
- .layer(1, new DenseLayer.Builder().nIn(2).nOut(2)
- .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build())
- .layer(2, new DenseLayer.Builder().nIn(2).nOut(2)
- .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).build())
- .layer(3, new DenseLayer.Builder().nIn(2).nOut(2)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build())
- .layer(4, new OutputLayer.Builder().nIn(2).nOut(2)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAMAX).build())
- .build();
-
- MultiLayerNetwork net = new MultiLayerNetwork(conf);
- net.init();
-
- net.fit(Nd4j.create(1, 2), Nd4j.create(1, 2));
-
-
- MultiLayerUpdater updater = (MultiLayerUpdater) net.getUpdater();
- List l = updater.getUpdaterBlocks();
-
- Adam adam = (Adam) l.get(0).getGradientUpdater().getConfig(); //u0.updaterForVariable.get("W");
- assertEquals(e, adam.getEpsilon(), 0.0);
-
- RmsProp rmsProp = (RmsProp) l.get(1).getGradientUpdater().getConfig(); //u1.updaterForVariable.get("W");
- assertEquals(e, rmsProp.getEpsilon(), 0.0);
-
- AdaDelta adaDelta = (AdaDelta) l.get(2).getGradientUpdater().getConfig(); //u2.updaterForVariable.get("W");
- assertEquals(e, adaDelta.getEpsilon(), 0.0);
-
- AdaGrad adaGrad = (AdaGrad) l.get(3).getGradientUpdater().getConfig(); //u3.updaterForVariable.get("W");
- assertEquals(e, adaGrad.getEpsilon(), 0.0);
-
- AdaMax adaMax = (AdaMax) l.get(4).getGradientUpdater().getConfig(); //u3.updaterForVariable.get("W");
- assertEquals(e, adaMax.getEpsilon(), 0.0);
+ assertEquals(lr, ((Sgd)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4);
}
@Test
@@ -910,18 +799,18 @@ public void testUpdaterBlockMlnAndCG() {
List blocks;
if (i == 0) {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.5).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list()
.layer(0, new DenseLayer.Builder().nIn(10).nOut(10).name("l0")
- .updater(org.deeplearning4j.nn.conf.Updater.ADAM).build())
+ .updater(new Adam(0.5)).build())
.layer(1, new DenseLayer.Builder().nIn(10).nOut(10).name("l1")
- .updater(org.deeplearning4j.nn.conf.Updater.ADAM).biasLearningRate(0.25)
+ .updater(new Adam(0.5)).biasUpdater(new Adam(0.25))
.build())
.layer(2, new DenseLayer.Builder().nIn(10).nOut(10).name("l2")
- .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).build())
+ .updater(new AdaDelta()).build())
.layer(3, new DenseLayer.Builder().nIn(10).nOut(10).name("l3")
- .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build())
+ .updater(new AdaGrad(0.5)).build())
.layer(4, new OutputLayer.Builder().nIn(10).nOut(10).name("l4")
- .updater(org.deeplearning4j.nn.conf.Updater.ADAMAX).build())
+ .updater(new AdaMax(0.5)).build())
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
@@ -930,19 +819,19 @@ public void testUpdaterBlockMlnAndCG() {
MultiLayerUpdater u = (MultiLayerUpdater) net.getUpdater();
blocks = u.getUpdaterBlocks();
} else {
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.5)
+ ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
.graphBuilder().addInputs("in")
.addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAM).build(), "in")
+ .updater(new Adam(0.5)).build(), "in")
.addLayer("l1", new DenseLayer.Builder().nIn(10).nOut(10)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAM).biasLearningRate(0.25)
+ .updater(new Adam(0.5)).biasUpdater(new Adam(0.25))
.build(), "l0")
.addLayer("l2", new DenseLayer.Builder().nIn(10).nOut(10)
- .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).build(), "l1")
+ .updater(new AdaDelta()).build(), "l1")
.addLayer("l3", new DenseLayer.Builder().nIn(10).nOut(10)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build(), "l2")
+ .updater(new AdaGrad(0.5)).build(), "l2")
.addLayer("l4", new OutputLayer.Builder().nIn(10).nOut(10)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAMAX).build(), "l3")
+ .updater(new AdaMax(0.5)).build(), "l3")
.setOutputs("l4").build();
ComputationGraph net = new ComputationGraph(conf);
@@ -1043,8 +932,7 @@ public void testUpdaterBlockVae() {
List blocks;
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(0.5)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAM).list()
+ new NeuralNetConfiguration.Builder().updater(new Adam(0.5)).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(8).nOut(12)
.encoderLayerSizes(10, 11).decoderLayerSizes(13, 14).build())
.build();
@@ -1078,86 +966,4 @@ public void testUpdaterBlockVae() {
}
assertEquals(expParams, actParams);
}
-
-
- @Test
- public void testUpdaterConfigDeprecatedMethods() {
- //.momentum(), .epsilon() etc - these are now deprecated, but we still want them to work as expected
- // until they are actually removed
-
- double lr = 0.75;
- double eps = 0.65;
- double adamMean = 0.1;
- double adamVar = 0.2;
- double momentum = 0.3;
- Map momentumSchedule = new HashMap<>();
- momentumSchedule.put(0, 0.35);
- momentumSchedule.put(10, 0.34);
- double rmsDecay = 0.4;
-
- for (boolean useEnum : new boolean[] {true, false}) {
- NeuralNetConfiguration.ListBuilder listBuilder = new NeuralNetConfiguration.Builder()
- //Multiple updaters
- .learningRate(lr).epsilon(eps)
- //Adam
- .adamMeanDecay(adamMean).adamVarDecay(adamVar)
- //Momentum
- .momentum(momentum).momentumAfter(momentumSchedule)
- //RMSProp
- .rmsDecay(rmsDecay).list();
- if (useEnum) {
- listBuilder.layer(0,
- new DenseLayer.Builder().nIn(10).nOut(10)
- .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
- .layer(1, new DenseLayer.Builder().nIn(10).nOut(10)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAM).build())
- .layer(2, new DenseLayer.Builder().nIn(10).nOut(10)
- .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).build())
- .layer(3, new DenseLayer.Builder().nIn(10).nOut(10)
- .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build())
- .layer(4, new DenseLayer.Builder().nIn(10).nOut(10)
- .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build())
- .layer(5, new DenseLayer.Builder().nIn(10).nOut(10)
- .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build());
- } else {
- listBuilder.layer(0, new DenseLayer.Builder().nIn(10).nOut(10).updater(new Sgd()).build())
- .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).updater(new Adam()).build())
- .layer(2, new DenseLayer.Builder().nIn(10).nOut(10).updater(new AdaDelta()).build())
- .layer(3, new DenseLayer.Builder().nIn(10).nOut(10).updater(new Nesterovs()).build())
- .layer(4, new DenseLayer.Builder().nIn(10).nOut(10).updater(new AdaGrad()).build())
- .layer(5, new DenseLayer.Builder().nIn(10).nOut(10).updater(new RmsProp()).build());
- }
-
-
- MultiLayerConfiguration conf = listBuilder.build();
-
- Sgd sgd = (Sgd) ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(0).getLayer()).getIUpdater();
- assertEquals(lr, sgd.getLearningRate(), 1e-6);
-
- Adam adam = (Adam) ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(1).getLayer()).getIUpdater();
- assertEquals(lr, adam.getLearningRate(), 1e-6);
- assertEquals(eps, adam.getEpsilon(), 1e-6);
- assertEquals(adamMean, adam.getBeta1(), 1e-6);
- assertEquals(adamVar, adam.getBeta2(), 1e-6);
-
- //Adadelta: no params
-
- Nesterovs nesterovs = (Nesterovs) ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(3).getLayer())
- .getIUpdater();
- assertEquals(lr, nesterovs.getLearningRate(), 1e-6);
- assertEquals(momentum, nesterovs.getMomentum(), 1e-6);
- assertEquals(momentumSchedule, nesterovs.getMomentumSchedule());
-
- AdaGrad adagrad = (AdaGrad) ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(4).getLayer())
- .getIUpdater();
- assertEquals(lr, adagrad.getLearningRate(), 1e-6);
- assertEquals(eps, adagrad.getEpsilon(), 1e-6);
-
- RmsProp rmsProp = (RmsProp) ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(5).getLayer())
- .getIUpdater();
- assertEquals(lr, rmsProp.getLearningRate(), 1e-6);
- assertEquals(rmsDecay, rmsProp.getRmsDecay(), 1e-6);
- assertEquals(eps, rmsProp.getEpsilon(), 1e-6);
- }
- }
}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomGradientUpdater.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomGradientUpdater.java
index 7a7c00594d91..be1f1cd58129 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomGradientUpdater.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomGradientUpdater.java
@@ -23,7 +23,7 @@ public void setStateViewArray(INDArray viewArray, int[] gradientShape, char grad
}
@Override
- public void applyUpdater(INDArray gradient, int iteration) {
+ public void applyUpdater(INDArray gradient, int iteration, int epoch) {
gradient.muli(config.getLearningRate());
}
}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomIUpdater.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomIUpdater.java
index 6e9519c8010e..86de505e40b6 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomIUpdater.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomIUpdater.java
@@ -26,11 +26,6 @@ public long stateSize(long numParams) {
return 0;
}
- @Override
- public void applySchedules(int iteration, double newLearningRate) {
- this.learningRate = newLearningRate;
- }
-
@Override
public GradientUpdater instantiate(INDArray viewArray, boolean initializeViewArray) {
if (viewArray != null) {
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java
index f0b8a28557c4..83496942360d 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java
@@ -2,7 +2,6 @@
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.BaseLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
@@ -30,16 +29,16 @@ public void testCustomUpdater() {
double lr = 0.03;
Nd4j.getRandom().setSeed(12345);
- MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345).learningRate(lr)
- .activation(Activation.TANH).updater(new CustomIUpdater()) //Specify custom IUpdater
+ MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345)
+ .activation(Activation.TANH).updater(new CustomIUpdater(lr)) //Specify custom IUpdater
.list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build())
.layer(1, new OutputLayer.Builder().nIn(10).nOut(10)
.lossFunction(LossFunctions.LossFunction.MSE).build())
.build();
Nd4j.getRandom().setSeed(12345);
- MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345).learningRate(lr)
- .activation(Activation.TANH).updater(Updater.SGD).list()
+ MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345)
+ .activation(Activation.TANH).updater(new Sgd(lr)).list()
.layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder()
.nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build())
.build();
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java
index 7a533544417a..7ce2185c594e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java
@@ -4,7 +4,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.layers.OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@@ -21,6 +20,7 @@
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Nesterovs;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.Collections;
@@ -231,8 +231,7 @@ public void testBackTrackLineHessian() {
private static MultiLayerConfiguration getIrisMultiLayerConfig(Activation activationFunction, int iterations,
OptimizationAlgorithm optimizer) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(optimizer)
- .iterations(iterations).miniBatch(false).momentum(0.9).learningRate(0.01)
- .updater(Updater.NESTEROVS).seed(12345L).list()
+ .iterations(iterations).miniBatch(false).updater(new Nesterovs(0.9)).seed(12345L).list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER)
.activation(activationFunction).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java
index 6e297143eef1..26ea59d43f64 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java
@@ -1,6 +1,5 @@
package org.deeplearning4j.optimize.solver;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
@@ -9,7 +8,6 @@
import org.deeplearning4j.nn.conf.CacheMode;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.RBM;
@@ -36,7 +34,10 @@
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.conditions.Condition;
+import org.nd4j.linalg.learning.config.AdaGrad;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
import java.util.Collection;
@@ -115,15 +116,13 @@ public void testOptimizersMLP() {
private static MultiLayerConfiguration getMLPConfigIris(OptimizationAlgorithm oa, int nIterations) {
MultiLayerConfiguration c = new NeuralNetConfiguration.Builder().optimizationAlgo(oa).iterations(nIterations)
- .learningRate(1e-1).seed(12345L)
+ .updater(new AdaGrad(1e-1)).seed(12345L)
.list().layer(0,
new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER)
- .updater(Updater.ADAGRAD).activation(
- Activation.RELU)
+ .activation(Activation.RELU)
.build())
.layer(1, new OutputLayer.Builder(LossFunction.MCXENT).nIn(3).nOut(3)
- .weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD)
- .activation(Activation.SOFTMAX).build())
+ .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build())
.backprop(true).pretrain(false).build();
return c;
@@ -188,8 +187,8 @@ public void testSphereFnOptHelper(OptimizationAlgorithm oa, int numLineSearchIte
+ nDimensions);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().maxNumLineSearchIterations(numLineSearchIter)
- .iterations(100).learningRate(1e-2)
- .layer(new RBM.Builder().nIn(1).nOut(1).updater(Updater.SGD).build()).build();
+ .iterations(100).updater(new Sgd(1e-2))
+ .layer(new RBM.Builder().nIn(1).nOut(1).build()).build();
conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here
Random rng = new DefaultRandom(12345L);
@@ -278,8 +277,8 @@ private static void testSphereFnMultipleStepsHelper(OptimizationAlgorithm oa, in
org.nd4j.linalg.api.rng.distribution.Distribution dist =
new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
- .maxNumLineSearchIterations(maxNumLineSearchIter).iterations(i).learningRate(0.1)
- .layer(new DenseLayer.Builder().nIn(1).nOut(1).updater(Updater.SGD).build()).build();
+ .maxNumLineSearchIterations(maxNumLineSearchIter).iterations(i).updater(new Sgd(0.1))
+ .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here
Model m = new SphereFunctionModel(100, dist, conf);
@@ -348,11 +347,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) {
throw new UnsupportedOperationException();
}
- @Override
- public void applyLearningRateScoreDecay() {
-
- }
-
@Override
public void setCacheMode(CacheMode mode) {
throw new UnsupportedOperationException();
@@ -377,6 +371,11 @@ public void setInput(INDArray input) {
public boolean isPretrainLayer() {
return false;
}
+
+ @Override
+ public void clearNoiseWeightParams() {
+
+ }
}
@@ -412,8 +411,8 @@ private static void testRastriginFnMultipleStepsHelper(OptimizationAlgorithm oa,
for (int i = 0; i <= nOptIter; i++) {
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.maxNumLineSearchIterations(maxNumLineSearchIter).iterations(i).miniBatch(false)
- .learningRate(1e-2)
- .layer(new DenseLayer.Builder().nIn(1).nOut(1).updater(Updater.ADAGRAD).build()).build();
+ .updater(new AdaGrad(1e-2))
+ .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here
Model m = new RastriginFunctionModel(10, conf);
@@ -536,11 +535,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) {
throw new UnsupportedOperationException();
}
- @Override
- public void applyLearningRateScoreDecay() {
-
- }
-
@Override
public void setCacheMode(CacheMode mode) {
@@ -566,6 +560,11 @@ public void setInput(INDArray input) {
public boolean isPretrainLayer() {
return false;
}
+
+ @Override
+ public void clearNoiseWeightParams() {
+
+ }
}
@@ -595,8 +594,9 @@ private static void testRosenbrockFnMultipleStepsHelper(OptimizationAlgorithm oa
for (int i = 0; i <= nOptIter; i++) {
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.maxNumLineSearchIterations(maxNumLineSearchIter).iterations(i)
+ .updater(new Sgd(1e-1))
.stepFunction(new org.deeplearning4j.nn.conf.stepfunctions.NegativeDefaultStepFunction())
- .learningRate(1e-1).layer(new RBM.Builder().nIn(1).nOut(1).updater(Updater.SGD).build())
+ .layer(new RBM.Builder().nIn(1).nOut(1).build())
.build();
conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here
@@ -733,11 +733,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) {
throw new UnsupportedOperationException();
}
- @Override
- public void applyLearningRateScoreDecay() {
-
- }
-
@Override
public void setCacheMode(CacheMode mode) {
@@ -763,6 +758,11 @@ public void setInput(INDArray input) {
public boolean isPretrainLayer() {
return false;
}
+
+ @Override
+ public void clearNoiseWeightParams() {
+
+ }
}
@@ -992,36 +992,11 @@ public Type type() {
throw new UnsupportedOperationException();
}
- @Override
- public Gradient error(INDArray input) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public INDArray derivativeActivation(INDArray input) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray indArray) {
- throw new UnsupportedOperationException();
- }
-
@Override
public Pair backpropGradient(INDArray epsilon) {
throw new UnsupportedOperationException();
}
- @Override
- public void merge(Layer layer, int batchSize) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public INDArray activationMean() {
- throw new UnsupportedOperationException();
- }
-
@Override
public INDArray preOutput(INDArray x) {
throw new UnsupportedOperationException();
@@ -1093,5 +1068,25 @@ public INDArray getGradientsViewArray() {
public void applyConstraints(int iteration, int epoch) {
}
+
+ @Override
+ public int getIterationCount() {
+ return 0;
+ }
+
+ @Override
+ public int getEpochCount() {
+ return 0;
+ }
+
+ @Override
+ public void setIterationCount(int iterationCount) {
+
+ }
+
+ @Override
+ public void setEpochCount(int epochCount) {
+
+ }
}
}
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestParamAndGradientIterationListener.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestParamAndGradientIterationListener.java
index 3f70b01511ac..399563ec7b3e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestParamAndGradientIterationListener.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestParamAndGradientIterationListener.java
@@ -12,6 +12,7 @@
import org.deeplearning4j.optimize.listeners.ParamAndGradientIterationListener;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.io.File;
@@ -24,7 +25,7 @@ public void test() {
IrisDataSetIterator iter = new IrisDataSetIterator(30, 150);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1e-5)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(1e-5))
.iterations(1).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(20).build())
.layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build())
.layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/ParallelExistingMiniBatchDataSetIteratorTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/ParallelExistingMiniBatchDataSetIteratorTest.java
index 8d55d3f41093..b099c3ebb5ec 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/ParallelExistingMiniBatchDataSetIteratorTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/ParallelExistingMiniBatchDataSetIteratorTest.java
@@ -2,12 +2,12 @@
import lombok.extern.slf4j.Slf4j;
import org.datavec.api.util.ClassPathResource;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.iterator.callbacks.DataSetDeserializer;
import org.deeplearning4j.datasets.iterator.parallel.FileSplitParallelDataSetIterator;
import org.junit.Before;
import org.junit.Test;
import org.nd4j.linalg.dataset.DataSet;
+import org.nd4j.linalg.primitives.Pair;
import java.io.File;
import java.util.ArrayList;
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java
index ef7c208fe495..4a097140671b 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java
@@ -1,10 +1,8 @@
package org.deeplearning4j.parallelism;
import org.deeplearning4j.nn.api.Model;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.WorkspaceMode;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
@@ -16,6 +14,7 @@
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Nesterovs;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.List;
@@ -42,15 +41,10 @@ public void testModelInitialParamsEquality1() throws Exception {
@Override
public void run() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(119).iterations(1) // Training iterations as above
- .regularization(true).l2(0.0005)
- /*
- Uncomment the following for learning decay and bias
- */
- .learningRate(.01)//.biasLearningRate(0.02)
+ .l2(0.0005)
//.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75)
.weightInit(WeightInit.XAVIER)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- .updater(Updater.NESTEROVS).momentum(0.9)
+ .updater(new Nesterovs(0.01, 0.9))
.trainingWorkspaceMode(WorkspaceMode.SINGLE).list()
.layer(0, new ConvolutionLayer.Builder(5, 5)
//nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java
index 34fbd741e194..d1f4b3d8c00e 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java
@@ -2,8 +2,8 @@
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
@@ -52,9 +52,8 @@ public void regressionTestMLP1() throws Exception {
assertEquals(3, l0.getNIn());
assertEquals(4, l0.getNOut());
assertEquals(WeightInit.XAVIER, l0.getWeightInit());
- assertEquals(Updater.NESTEROVS, l0.getUpdater());
- assertEquals(0.9, l0.getMomentum(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
+ assertEquals(new Nesterovs(0.15, 0.9), l0.getIUpdater());
+ assertEquals(0.15, ((Nesterovs)l0.getIUpdater()).getLearningRate(), 1e-6);
OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer();
assertEquals("softmax", l1.getActivationFn().toString());
@@ -63,9 +62,9 @@ public void regressionTestMLP1() throws Exception {
assertEquals(4, l1.getNIn());
assertEquals(5, l1.getNOut());
assertEquals(WeightInit.XAVIER, l1.getWeightInit());
- assertEquals(Updater.NESTEROVS, l1.getUpdater());
- assertEquals(0.9, l1.getMomentum(), 1e-6);
- assertEquals(0.15, l1.getLearningRate(), 1e-6);
+ assertEquals(new Nesterovs(0.15, 0.9), l1.getIUpdater());
+ assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6);
+ assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6);
int numParams = net.numParams();
assertEquals(Nd4j.linspace(1, numParams, numParams), net.params());
@@ -93,10 +92,9 @@ public void regressionTestMLP2() throws Exception {
assertEquals(4, l0.getNOut());
assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit());
assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l0.getRmsDecay(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
- assertEquals(0.6, l0.getDropOut(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
+ assertEquals(new Dropout(0.6), l0.getIDropout());
assertEquals(0.1, l0.getL1(), 1e-6);
assertEquals(0.2, l0.getL2(), 1e-6);
@@ -108,10 +106,9 @@ public void regressionTestMLP2() throws Exception {
assertEquals(5, l1.getNOut());
assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit());
assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l1.getRmsDecay(), 1e-6);
- assertEquals(0.15, l1.getLearningRate(), 1e-6);
- assertEquals(0.6, l1.getDropOut(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l1.getIUpdater()).getLearningRate(), 1e-6);
+ assertEquals(new Dropout(0.6), l1.getIDropout());
assertEquals(0.1, l1.getL1(), 1e-6);
assertEquals(0.2, l1.getL2(), 1e-6);
@@ -140,9 +137,8 @@ public void regressionTestCNN1() throws Exception {
assertEquals(3, l0.getNIn());
assertEquals(3, l0.getNOut());
assertEquals(WeightInit.RELU, l0.getWeightInit());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l0.getRmsDecay(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
assertArrayEquals(new int[] {2, 2}, l0.getKernelSize());
assertArrayEquals(new int[] {1, 1}, l0.getStride());
assertArrayEquals(new int[] {0, 0}, l0.getPadding());
@@ -162,9 +158,8 @@ public void regressionTestCNN1() throws Exception {
assertEquals(26 * 26 * 3, l2.getNIn());
assertEquals(5, l2.getNOut());
assertEquals(WeightInit.RELU, l0.getWeightInit());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l0.getRmsDecay(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
int numParams = net.numParams();
assertEquals(Nd4j.linspace(1, numParams, numParams), net.params());
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java
index 7bf27d3d0f75..bcd46cf56ca0 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java
@@ -1,7 +1,11 @@
package org.deeplearning4j.regressiontest;
-import org.deeplearning4j.nn.conf.*;
+import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
+import org.deeplearning4j.nn.conf.ConvolutionMode;
+import org.deeplearning4j.nn.conf.GradientNormalization;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.graph.LayerVertex;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
@@ -53,9 +57,8 @@ public void regressionTestMLP1() throws Exception {
assertEquals(3, l0.getNIn());
assertEquals(4, l0.getNOut());
assertEquals(WeightInit.XAVIER, l0.getWeightInit());
- assertEquals(Updater.NESTEROVS, l0.getUpdater());
- assertEquals(0.9, l0.getMomentum(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
+ assertEquals(new Nesterovs(0.15, 0.9), l0.getIUpdater());
+ assertEquals(0.15, ((Nesterovs)l0.getIUpdater()).getLearningRate(), 1e-6);
OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer();
assertEquals("softmax", l1.getActivationFn().toString());
@@ -64,9 +67,9 @@ public void regressionTestMLP1() throws Exception {
assertEquals(4, l1.getNIn());
assertEquals(5, l1.getNOut());
assertEquals(WeightInit.XAVIER, l1.getWeightInit());
- assertEquals(Updater.NESTEROVS, l1.getUpdater());
- assertEquals(0.9, l1.getMomentum(), 1e-6);
- assertEquals(0.15, l1.getLearningRate(), 1e-6);
+ assertEquals(new Nesterovs(0.15, 0.9), l1.getIUpdater());
+ assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6);
+ assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6);
int numParams = net.numParams();
assertEquals(Nd4j.linspace(1, numParams, numParams), net.params());
@@ -94,10 +97,9 @@ public void regressionTestMLP2() throws Exception {
assertEquals(4, l0.getNOut());
assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit());
assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l0.getRmsDecay(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
- assertEquals(0.6, l0.getDropOut(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
+ assertEquals(new Dropout(0.6), l0.getIDropout());
assertEquals(0.1, l0.getL1(), 1e-6);
assertEquals(0.2, l0.getL2(), 1e-6);
assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l0.getGradientNormalization());
@@ -111,10 +113,9 @@ public void regressionTestMLP2() throws Exception {
assertEquals(5, l1.getNOut());
assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit());
assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l1.getRmsDecay(), 1e-6);
- assertEquals(0.15, l1.getLearningRate(), 1e-6);
- assertEquals(0.6, l1.getDropOut(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l1.getIUpdater()).getLearningRate(), 1e-6);
+ assertEquals(new Dropout(0.6), l1.getIDropout());
assertEquals(0.1, l1.getL1(), 1e-6);
assertEquals(0.2, l1.getL2(), 1e-6);
assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization());
@@ -145,9 +146,8 @@ public void regressionTestCNN1() throws Exception {
assertEquals(3, l0.getNIn());
assertEquals(3, l0.getNOut());
assertEquals(WeightInit.RELU, l0.getWeightInit());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l0.getRmsDecay(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
assertArrayEquals(new int[] {2, 2}, l0.getKernelSize());
assertArrayEquals(new int[] {1, 1}, l0.getStride());
assertArrayEquals(new int[] {0, 0}, l0.getPadding());
@@ -167,9 +167,8 @@ public void regressionTestCNN1() throws Exception {
assertEquals(26 * 26 * 3, l2.getNIn());
assertEquals(5, l2.getNOut());
assertEquals(WeightInit.RELU, l0.getWeightInit());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l0.getRmsDecay(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor);
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java
index 34b6d89e1fcd..b2a361cec589 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java
@@ -1,7 +1,11 @@
package org.deeplearning4j.regressiontest;
-import org.deeplearning4j.nn.conf.*;
+import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
+import org.deeplearning4j.nn.conf.ConvolutionMode;
+import org.deeplearning4j.nn.conf.GradientNormalization;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.graph.LayerVertex;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
@@ -54,9 +58,8 @@ public void regressionTestMLP1() throws Exception {
assertEquals(3, l0.getNIn());
assertEquals(4, l0.getNOut());
assertEquals(WeightInit.XAVIER, l0.getWeightInit());
- assertEquals(Updater.NESTEROVS, l0.getUpdater());
- assertEquals(0.9, l0.getMomentum(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
+ assertEquals(new Nesterovs(0.15, 0.9), l0.getIUpdater());
+ assertEquals(0.15, ((Nesterovs)l0.getIUpdater()).getLearningRate(), 1e-6);
OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer();
assertEquals("softmax", l1.getActivationFn().toString());
@@ -65,9 +68,9 @@ public void regressionTestMLP1() throws Exception {
assertEquals(4, l1.getNIn());
assertEquals(5, l1.getNOut());
assertEquals(WeightInit.XAVIER, l1.getWeightInit());
- assertEquals(Updater.NESTEROVS, l1.getUpdater());
- assertEquals(0.9, l1.getMomentum(), 1e-6);
- assertEquals(0.15, l1.getLearningRate(), 1e-6);
+ assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6);
+ assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6);
+ assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6);
int numParams = net.numParams();
assertEquals(Nd4j.linspace(1, numParams, numParams), net.params());
@@ -95,10 +98,9 @@ public void regressionTestMLP2() throws Exception {
assertEquals(4, l0.getNOut());
assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit());
assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l0.getRmsDecay(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
- assertEquals(0.6, l0.getDropOut(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
+ assertEquals(new Dropout(0.6), l0.getIDropout());
assertEquals(0.1, l0.getL1(), 1e-6);
assertEquals(0.2, l0.getL2(), 1e-6);
assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l0.getGradientNormalization());
@@ -112,10 +114,9 @@ public void regressionTestMLP2() throws Exception {
assertEquals(5, l1.getNOut());
assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit());
assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l1.getRmsDecay(), 1e-6);
- assertEquals(0.15, l1.getLearningRate(), 1e-6);
- assertEquals(0.6, l1.getDropOut(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
+ assertEquals(new Dropout(0.6), l1.getIDropout());
assertEquals(0.1, l1.getL1(), 1e-6);
assertEquals(0.2, l1.getL2(), 1e-6);
assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization());
@@ -146,9 +147,8 @@ public void regressionTestCNN1() throws Exception {
assertEquals(3, l0.getNIn());
assertEquals(3, l0.getNOut());
assertEquals(WeightInit.RELU, l0.getWeightInit());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l0.getRmsDecay(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
assertArrayEquals(new int[] {2, 2}, l0.getKernelSize());
assertArrayEquals(new int[] {1, 1}, l0.getStride());
assertArrayEquals(new int[] {0, 0}, l0.getPadding());
@@ -168,9 +168,8 @@ public void regressionTestCNN1() throws Exception {
assertEquals(26 * 26 * 3, l2.getNIn());
assertEquals(5, l2.getNOut());
assertEquals(WeightInit.RELU, l0.getWeightInit());
- assertEquals(Updater.RMSPROP, l0.getUpdater());
- assertEquals(0.96, l0.getRmsDecay(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
+ assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater());
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor);
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java
index 00bb3b80d83b..f097576c5409 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java
@@ -5,6 +5,7 @@
import org.deeplearning4j.nn.conf.GradientNormalization;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.graph.LayerVertex;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
@@ -58,7 +59,7 @@ public void regressionTestMLP1() throws Exception {
assertTrue(l0.getIUpdater() instanceof Nesterovs);
Nesterovs n = (Nesterovs) l0.getIUpdater();
assertEquals(0.9, n.getMomentum(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
+ assertEquals(0.15, ((Nesterovs)l0.getIUpdater()).getLearningRate(), 1e-6);
assertEquals(0.15, n.getLearningRate(), 1e-6);
@@ -69,9 +70,8 @@ public void regressionTestMLP1() throws Exception {
assertEquals(5, l1.getNOut());
assertEquals(WeightInit.XAVIER, l1.getWeightInit());
assertTrue(l1.getIUpdater() instanceof Nesterovs);
- n = (Nesterovs) l1.getIUpdater();
- assertEquals(0.9, n.getMomentum(), 1e-6);
- assertEquals(0.15, l1.getLearningRate(), 1e-6);
+ assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6);
+ assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6);
assertEquals(0.15, n.getLearningRate(), 1e-6);
int numParams = net.numParams();
@@ -104,8 +104,8 @@ public void regressionTestMLP2() throws Exception {
RmsProp r = (RmsProp) l0.getIUpdater();
assertEquals(0.96, r.getRmsDecay(), 1e-6);
assertEquals(0.15, r.getLearningRate(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
- assertEquals(0.6, l0.getDropOut(), 1e-6);
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
+ assertEquals(new Dropout(0.6), l0.getIDropout());
assertEquals(0.1, l0.getL1(), 1e-6);
assertEquals(0.2, l0.getL2(), 1e-6);
assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l0.getGradientNormalization());
@@ -122,8 +122,8 @@ public void regressionTestMLP2() throws Exception {
r = (RmsProp) l1.getIUpdater();
assertEquals(0.96, r.getRmsDecay(), 1e-6);
assertEquals(0.15, r.getLearningRate(), 1e-6);
- assertEquals(0.15, l1.getLearningRate(), 1e-6);
- assertEquals(0.6, l1.getDropOut(), 1e-6);
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
+ assertEquals(new Dropout(0.6), l1.getIDropout());
assertEquals(0.1, l1.getL1(), 1e-6);
assertEquals(0.2, l1.getL2(), 1e-6);
assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization());
@@ -158,7 +158,7 @@ public void regressionTestCNN1() throws Exception {
RmsProp r = (RmsProp) l0.getIUpdater();
assertEquals(0.96, r.getRmsDecay(), 1e-6);
assertEquals(0.15, r.getLearningRate(), 1e-6);
- assertEquals(0.15, l0.getLearningRate(), 1e-6);
+ assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6);
assertArrayEquals(new int[] {2, 2}, l0.getKernelSize());
assertArrayEquals(new int[] {1, 1}, l0.getStride());
assertArrayEquals(new int[] {0, 0}, l0.getPadding());
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java
index 3e1452e96faf..6f5856d20041 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java
@@ -16,6 +16,7 @@
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.io.ClassPathResource;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.io.BufferedOutputStream;
@@ -58,8 +59,8 @@ public void testLoadNormalizers() throws Exception {
int nIn = 5;
int nOut = 6;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01)
- .l2(0.01).learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01).l2(0.01)
+ .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build())
.layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder()
.lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build())
@@ -89,8 +90,8 @@ public void testModelGuesserDl4jModel() throws Exception {
int nIn = 5;
int nOut = 6;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01)
- .l2(0.01).learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01)
+ .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build())
.layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder()
.lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build())
diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java
index 1489fc681d80..b3acbd24ceb2 100644
--- a/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java
+++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java
@@ -18,6 +18,7 @@
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler;
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.io.File;
@@ -37,8 +38,8 @@ public void testWriteMLNModel() throws Exception {
int nIn = 5;
int nOut = 6;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01)
- .l2(0.01).learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01)
+ .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build())
.layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder()
.lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build())
@@ -64,8 +65,8 @@ public void testWriteMlnModelInputStream() throws Exception {
int nIn = 5;
int nOut = 6;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01)
- .l2(0.01).learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01)
+ .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list()
.layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build())
.layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder()
.lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build())
@@ -108,7 +109,7 @@ public void testWriteMlnModelInputStream() throws Exception {
@Test
public void testWriteCGModel() throws Exception {
ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.1)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1))
.graphBuilder().addInputs("in")
.addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out",
new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3)
@@ -134,7 +135,7 @@ public void testWriteCGModel() throws Exception {
@Test
public void testWriteCGModelInputStream() throws Exception {
ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.1)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1))
.graphBuilder().addInputs("in")
.addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out",
new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3)
@@ -166,7 +167,7 @@ private DataSet trivialDataSet() {
private ComputationGraph simpleComputationGraph() {
ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.1)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1))
.graphBuilder().addInputs("in")
.addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out",
new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3)
diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java
index 0daf03d8b3c2..9acc3f5386a3 100644
--- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java
+++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java
@@ -19,7 +19,6 @@
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.AlgoMode;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.BwdDataAlgo;
@@ -41,6 +40,7 @@
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.jcublas.context.CudaContext;
+import org.nd4j.linalg.primitives.Pair;
import static org.bytedeco.javacpp.cuda.CUstream_st;
import static org.bytedeco.javacpp.cudnn.*;
diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java
index 348657b7a81d..fdb8bde0e74b 100644
--- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java
+++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java
@@ -19,7 +19,6 @@
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.gradient.DefaultGradient;
@@ -36,6 +35,7 @@
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.jcublas.context.CudaContext;
+import org.nd4j.linalg.primitives.Pair;
import static org.bytedeco.javacpp.cuda.CUstream_st;
import static org.bytedeco.javacpp.cudnn.*;
diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java
index 3596f2a70be2..088688186736 100644
--- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java
+++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java
@@ -19,7 +19,6 @@
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.layers.BaseCudnnHelper;
@@ -32,6 +31,7 @@
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.jcublas.context.CudaContext;
+import org.nd4j.linalg.primitives.Pair;
import static org.bytedeco.javacpp.cuda.CUstream_st;
import static org.bytedeco.javacpp.cudnn.*;
diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnLocalResponseNormalizationHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnLocalResponseNormalizationHelper.java
index 813d567bb613..cf292aaa8248 100644
--- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnLocalResponseNormalizationHelper.java
+++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnLocalResponseNormalizationHelper.java
@@ -19,7 +19,6 @@
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.layers.BaseCudnnHelper;
@@ -31,6 +30,7 @@
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.jcublas.context.CudaContext;
+import org.nd4j.linalg.primitives.Pair;
import static org.bytedeco.javacpp.cuda.CUstream_st;
import static org.bytedeco.javacpp.cudnn.*;
diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/recurrent/CudnnLSTMHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/recurrent/CudnnLSTMHelper.java
index 80b8308e0859..02ab6f2c330f 100644
--- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/recurrent/CudnnLSTMHelper.java
+++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/recurrent/CudnnLSTMHelper.java
@@ -19,7 +19,6 @@
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
@@ -34,6 +33,7 @@
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.jcublas.context.CudaContext;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Map;
diff --git a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/TestConvolution.java b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/TestConvolution.java
index 074fb0c9d041..6f60ac5e33da 100644
--- a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/TestConvolution.java
+++ b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/TestConvolution.java
@@ -1,6 +1,5 @@
package org.deeplearning4j.convolution;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
@@ -16,7 +15,9 @@
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
+import org.nd4j.linalg.primitives.Pair;
import java.lang.reflect.Field;
import java.util.Arrays;
@@ -83,8 +84,8 @@ public void testCompareCudnnStandardOutputsVsMode() throws Exception {
l = new SubsamplingLayer.Builder().kernelSize(4, 4).stride(2, 2).build();
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true)
- .l2(0.0005).learningRate(.01).weightInit(WeightInit.XAVIER).convolutionMode(c).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
+ .l2(0.0005).updater(new Sgd(0.01)).weightInit(WeightInit.XAVIER).convolutionMode(c).list()
.layer(0, l)
.layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
.nOut(10).activation(Activation.SOFTMAX).build())
diff --git a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CuDNNGradientChecks.java b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CuDNNGradientChecks.java
index e691dcc11868..9854beba2a83 100644
--- a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CuDNNGradientChecks.java
+++ b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CuDNNGradientChecks.java
@@ -4,7 +4,6 @@
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
@@ -26,6 +25,7 @@
import org.nd4j.linalg.api.buffer.util.DataTypeUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.lang.reflect.Field;
@@ -56,7 +56,7 @@ public void testConvolutional() throws Exception {
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
- String[] activFns = {"sigmoid", "tanh"};
+ Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
int[] minibatchSizes = {1, 4};
@@ -69,7 +69,7 @@ public void testConvolutional() throws Exception {
f.setAccessible(true);
Random r = new Random(12345);
- for (String afn : activFns) {
+ for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int minibatchSize : minibatchSizes) {
@@ -79,10 +79,10 @@ public void testConvolutional() throws Exception {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
- MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().regularization(false)
+ MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
.weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-1, 1))
- .updater(Updater.NONE).seed(12345L).list()
+ .updater(new NoOp()).seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(1, 1).nOut(3)
.activation(afn).build())
.layer(1, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(0, 0).nOut(3)
@@ -172,9 +172,9 @@ public void testConvolutionalNoBias() throws Exception {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
- MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().regularization(false)
+ MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-1, 1))
- .updater(Updater.NONE).seed(12345L)
+ .updater(new NoOp()).seed(12345L)
.list()
.layer(0, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(1, 1).nOut(3)
.hasBias(convHasBias)
@@ -235,8 +235,8 @@ public void testBatchNormCnn() throws Exception {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
- MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0)
- .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
+ .seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
.activation(Activation.IDENTITY).build())
@@ -289,8 +289,8 @@ public void testLRN() throws Exception {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
- MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0)
- .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
+ .seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new ConvolutionLayer.Builder().nOut(6).kernelSize(2, 2).stride(1, 1)
.activation(Activation.TANH).build())
@@ -346,8 +346,8 @@ public void testLSTM() throws Exception {
}
}
- MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0)
- .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize)
.gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
@@ -403,8 +403,8 @@ public void testLSTM2() throws Exception {
}
}
- MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0)
- .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
+ .updater(new NoOp()).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize)
.gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
@@ -480,7 +480,7 @@ public void testCnnDilated() throws Exception {
}
NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345)
- .learningRate(1.0).updater(Updater.SGD)
+ .updater(new NoOp())
.activation(Activation.TANH).convolutionMode(cm).list()
.layer(new ConvolutionLayer.Builder().name("layer 0")
.kernelSize(k, k)
diff --git a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/lstm/ValidateCudnnLSTM.java b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/lstm/ValidateCudnnLSTM.java
index 359ed0422a8e..c40b2c0c0534 100644
--- a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/lstm/ValidateCudnnLSTM.java
+++ b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/lstm/ValidateCudnnLSTM.java
@@ -14,6 +14,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.lang.reflect.Field;
@@ -46,8 +47,8 @@ public void validateImplSimple() throws Exception {
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().inferenceWorkspaceMode(WorkspaceMode.NONE)
- .trainingWorkspaceMode(WorkspaceMode.NONE).learningRate(1.0).regularization(false)
- .updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ .trainingWorkspaceMode(WorkspaceMode.NONE).updater(new NoOp())
+ .seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize)
.gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
@@ -123,9 +124,9 @@ public void validateImplMultiLayer() throws Exception {
}
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp())
.inferenceWorkspaceMode(WorkspaceMode.NONE).trainingWorkspaceMode(WorkspaceMode.NONE)
- .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ .seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize)
.gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
@@ -207,9 +208,9 @@ public void validateImplMultiLayerTBPTT() throws Exception {
int tbpttLength = 5;
int nOut = 2;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp())
.inferenceWorkspaceMode(WorkspaceMode.NONE).trainingWorkspaceMode(WorkspaceMode.NONE)
- .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION)
+ .seed(12345L).weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new LSTM.Builder().nIn(inputSize).nOut(lstmLayerSize)
.gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
@@ -274,9 +275,9 @@ public void validateImplMultiLayerRnnTimeStep() throws Exception {
int tbpttLength = 5;
int nOut = 2;
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(1.0)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp())
.inferenceWorkspaceMode(WorkspaceMode.NONE).trainingWorkspaceMode(WorkspaceMode.NONE)
- .cacheMode(CacheMode.NONE).regularization(false).updater(Updater.NONE).seed(12345L)
+ .cacheMode(CacheMode.NONE).seed(12345L)
.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 2)).list()
.layer(0, new LSTM.Builder().nIn(inputSize).nOut(lstmLayerSize)
.gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
diff --git a/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/embeddings/GraphVectorsImpl.java b/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/embeddings/GraphVectorsImpl.java
index ec4cf5bbe04f..ac90ab0028dd 100644
--- a/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/embeddings/GraphVectorsImpl.java
+++ b/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/embeddings/GraphVectorsImpl.java
@@ -2,7 +2,6 @@
import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.graph.api.IGraph;
import org.deeplearning4j.graph.api.Vertex;
import org.deeplearning4j.graph.models.GraphVectors;
@@ -10,6 +9,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Comparator;
import java.util.PriorityQueue;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java
index a8f9cd869501..765a837f55a1 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java
@@ -30,13 +30,12 @@
import org.nd4j.shade.jackson.databind.ObjectMapper;
import java.io.IOException;
+import java.lang.Exception;
import java.util.ArrayList;
import java.util.List;
import static org.bytedeco.javacpp.hdf5.*;
-import java.lang.Exception;
-
/**
* Class for reading ND4J arrays and JSON strings from HDF5
* achive files.
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java
index b054ba8a176d..f776fa99a7db 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java
@@ -27,7 +27,6 @@
import org.deeplearning4j.nn.conf.graph.PreprocessorVertex;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.graph.ComputationGraph;
-
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasModelConfiguration;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
@@ -40,7 +39,10 @@
import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils;
import java.io.IOException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import static org.deeplearning4j.nn.modelimport.keras.KerasLayer.DimOrder;
import static org.deeplearning4j.nn.modelimport.keras.KerasLayer.customLayers;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java
index 37e77177eb0d..12c44eefa78a 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java
@@ -28,12 +28,15 @@
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.layers.KerasInput;
-import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelBuilder;
+import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import java.io.IOException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
/**
* Build DL4J MultiLayerNetwork model from Keras Sequential
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java
index ab418844da64..e4bd4033f3ea 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java
@@ -67,6 +67,7 @@ public class KerasLayerConfiguration {
private final String LAYER_CLASS_NAME_CONVOLUTION_1D = ""; // 1: Convolution1D, 2: Conv1D
private final String LAYER_CLASS_NAME_CONVOLUTION_2D = ""; // 1: Convolution2D, 2: Conv2D
private final String LAYER_CLASS_NAME_LEAKY_RELU = "LeakyReLU";
+ private final String LAYER_CLASS_NAME_UPSAMPLING_1D = "UpSampling1D";
private final String LAYER_CLASS_NAME_UPSAMPLING_2D = "UpSampling2D";
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java
index 67584d4b7e03..b4c51eff13c8 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java
@@ -3,8 +3,8 @@
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import java.util.ArrayList;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java
index 59125b1bf68c..e2a4e28d526f 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java
@@ -4,13 +4,12 @@
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LossLayer;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.ArrayList;
-import java.util.Map;
import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLossUtils.mapLossFunction;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java
index 5fc1b8dc71bb..5da8ab74c585 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java
@@ -26,7 +26,6 @@
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.activations.impl.ActivationLReLU;
-
import java.util.Map;
/**
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java
index 9377216622d1..1d35c3d49a50 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java
@@ -20,10 +20,8 @@
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.ArrayUtils;
-import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.conf.layers.Convolution1DLayer;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
import org.nd4j.linalg.api.ndarray.INDArray;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling1D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling1D.java
new file mode 100644
index 000000000000..bf3887b98ec0
--- /dev/null
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling1D.java
@@ -0,0 +1,95 @@
+/*-
+ *
+ * * Copyright 2017 Skymind,Inc.
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+package org.deeplearning4j.nn.modelimport.keras.layers.convolutional;
+
+import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.layers.Upsampling1D;
+import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
+
+import java.util.Map;
+
+
+/**
+ * Keras Upsampling1D layer support
+ *
+ * @author Max Pumperla
+ */
+public class KerasUpsampling1D extends KerasLayer {
+
+ /**
+ * Constructor from parsed Keras layer configuration dictionary.
+ *
+ * @param layerConfig dictionary containing Keras layer configuration.
+ * @throws InvalidKerasConfigurationException Invalid Keras configuration exception
+ * @throws UnsupportedKerasConfigurationException Unsupported Keras configuration exception
+ */
+ public KerasUpsampling1D(Map layerConfig)
+ throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException {
+ this(layerConfig, true);
+ }
+
+ /**
+ * Constructor from parsed Keras layer configuration dictionary.
+ *
+ * @param layerConfig dictionary containing Keras layer configuration
+ * @param enforceTrainingConfig whether to enforce training-related configuration options
+ * @throws InvalidKerasConfigurationException Invalid Keras configuration exception
+ * @throws UnsupportedKerasConfigurationException Invalid Keras configuration exception
+ */
+ public KerasUpsampling1D(Map layerConfig, boolean enforceTrainingConfig)
+ throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException {
+ super(layerConfig, enforceTrainingConfig);
+
+ int[] size = KerasConvolutionUtils.getUpsamplingSizeFromConfig(layerConfig, 1, conf);
+
+ Upsampling1D.Builder builder = new Upsampling1D.Builder()
+ .name(this.layerName)
+ .dropOut(this.dropout)
+ .size(size[0]);
+
+ this.layer = builder.build();
+ this.vertex = null;
+ }
+
+ /**
+ * Get DL4J Upsampling1D layer.
+ *
+ * @return Upsampling1D layer
+ */
+ public Upsampling1D getUpsampling1DLayer() {
+ return (Upsampling1D) this.layer;
+ }
+
+ /**
+ * Get layer output type.
+ *
+ * @param inputType Array of InputTypes
+ * @return output type as InputType
+ * @throws InvalidKerasConfigurationException
+ */
+ @Override
+ public InputType getOutputType(InputType... inputType) throws InvalidKerasConfigurationException {
+ if (inputType.length > 1)
+ throw new InvalidKerasConfigurationException(
+ "Keras Subsampling layer accepts only one input (received " + inputType.length + ")");
+ return this.getUpsampling1DLayer().getOutputType(-1, inputType[0]);
+ }
+
+}
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java
index 5679b47acf71..1a187ec89611 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java
@@ -8,7 +8,6 @@
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
-
import java.util.Map;
import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getZeroPaddingFromConfig;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java
index d5a97d093257..5477179d02c9 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java
@@ -4,8 +4,8 @@
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import java.util.Map;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java
index f485986a9b77..4e4ebc719311 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java
@@ -3,13 +3,14 @@
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ActivationLayer;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
-import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig;
import java.util.Map;
+import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig;
+
/**
* Imports an Activation layer from Keras.
*
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java
index 16011b37deb7..564bb0252973 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java
@@ -6,8 +6,8 @@
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
import org.deeplearning4j.nn.params.DefaultParamInitializer;
@@ -17,10 +17,10 @@
import java.util.Map;
import java.util.Set;
+import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig;
import static org.deeplearning4j.nn.modelimport.keras.utils.KerasInitilizationUtils.getWeightInitFromConfig;
import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.getHasBiasFromConfig;
import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.getNOutFromConfig;
-import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig;
/**
* Imports a Dense layer from Keras.
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java
index bbc6c35f8adf..15dd41aa609c 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java
@@ -3,8 +3,8 @@
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.DropoutLayer;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import java.util.Map;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java
index f03fe2e07266..a1a81f1e3f08 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java
@@ -6,8 +6,8 @@
import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeConvolutional;
import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.preprocessors.TensorFlowCnnToFeedForwardPreProcessor;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMerge.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMerge.java
index e181c8da6b8e..c2a133a95948 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMerge.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMerge.java
@@ -5,8 +5,8 @@
import org.deeplearning4j.nn.conf.graph.ElementWiseVertex;
import org.deeplearning4j.nn.conf.graph.MergeVertex;
import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java
index a149016f4d82..c53620f9fe46 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java
@@ -27,7 +27,6 @@
import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
import org.nd4j.linalg.util.ArrayUtil;
-
import java.util.List;
import java.util.Map;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java
index a8c428e10e59..f918e2ef0c57 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java
@@ -3,8 +3,8 @@
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LocalResponseNormalization;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasPoolHelper.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasPoolHelper.java
index 94f467132e89..20dec069a3f9 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasPoolHelper.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasPoolHelper.java
@@ -3,8 +3,8 @@
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.graph.PoolHelperVertex;
import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import java.util.Map;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java
index ab1d1abf5903..8d993342b85d 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java
@@ -6,15 +6,14 @@
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.EmbeddingLayer;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
import org.deeplearning4j.nn.params.DefaultParamInitializer;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.factory.Nd4j;
import java.util.HashMap;
import java.util.Map;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java
index d3b19feb003c..61e1b3770976 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java
@@ -5,8 +5,8 @@
import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.BatchNormalization;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
@@ -93,8 +93,7 @@ public KerasBatchNormalization(Map layerConfig, boolean enforceT
layerConfig, conf.getLAYER_FIELD_BATCHNORMALIZATION_GAMMA_CONSTRAINT(), conf, kerasMajorVersion);
BatchNormalization.Builder builder = new BatchNormalization.Builder().name(this.layerName).dropOut(this.dropout).minibatch(true)
- .lockGammaBeta(false).eps(getEpsFromConfig(layerConfig))
- .momentum(getMomentumFromConfig(layerConfig));
+ .lockGammaBeta(false).eps(getEpsFromConfig(layerConfig));
if (betaConstraint != null)
builder.constrainBeta(betaConstraint);
if (gammaConstraint != null)
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java
index 54598ccec4da..c1c8cb78531b 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java
@@ -23,8 +23,8 @@
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer;
import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import java.util.Map;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java
index 666439c72cce..7105c1034076 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java
@@ -20,17 +20,14 @@
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.Subsampling1DLayer;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
-import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getConvolutionModeFromConfig;
-import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getKernelSizeFromConfig;
-import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getStrideFromConfig;
-import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getPaddingFromBorderModeConfig;
-
import java.util.Map;
+import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*;
+
/**
* Imports a Keras 1D Pooling layer as a DL4J Subsampling layer.
*
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java
index a65b85d8d0ea..93ec62865e1c 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java
@@ -20,17 +20,14 @@
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
-import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getConvolutionModeFromConfig;
-import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getKernelSizeFromConfig;
-import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getStrideFromConfig;
-import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getPaddingFromBorderModeConfig;
-
import java.util.Map;
+import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*;
+
/**
* Imports a Keras 2D Pooling layer as a DL4J Subsampling layer.
*
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLstm.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLstm.java
index 6168b6be5731..384cbd84136c 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLstm.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLstm.java
@@ -3,10 +3,12 @@
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.api.layers.LayerConstraint;
+import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LSTM;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
+import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
@@ -22,10 +24,10 @@
import java.util.Map;
import java.util.Set;
+import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig;
import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.mapActivation;
import static org.deeplearning4j.nn.modelimport.keras.utils.KerasInitilizationUtils.getWeightInitFromConfig;
import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.getNOutFromConfig;
-import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig;
/**
* Imports a Keras LSTM layer as a DL4J LSTM layer.
@@ -163,7 +165,11 @@ public InputType getOutputType(InputType... inputType) throws InvalidKerasConfig
if (inputType.length > 1)
throw new InvalidKerasConfigurationException(
"Keras LSTM layer accepts only one input (received " + inputType.length + ")");
- return this.getLSTMLayer().getOutputType(-1, inputType[0]);
+ InputPreProcessor preProcessor = getInputPreprocessor(inputType);
+ if (preProcessor != null)
+ return preProcessor.getOutputType(inputType[0]);
+ else
+ return this.getLSTMLayer().getOutputType(-1, inputType[0]);
}
/**
@@ -176,6 +182,28 @@ public int getNumParams() {
return kerasMajorVersion == 2 ? NUM_TRAINABLE_PARAMS_KERAS_2 : NUM_TRAINABLE_PARAMS;
}
+ /**
+ * Gets appropriate DL4J InputPreProcessor for given InputTypes.
+ *
+ * @param inputType Array of InputTypes
+ * @return DL4J InputPreProcessor
+ * @throws InvalidKerasConfigurationException Invalid Keras configuration exception
+ * @see org.deeplearning4j.nn.conf.InputPreProcessor
+ */
+ @Override
+ public InputPreProcessor getInputPreprocessor(InputType... inputType) throws InvalidKerasConfigurationException {
+ if (inputType.length > 1)
+ throw new InvalidKerasConfigurationException(
+ "Keras LSTM layer accepts only one input (received " + inputType.length + ")");
+ InputPreProcessor preprocessor = null;
+ if (inputType[0] instanceof InputType.InputTypeFeedForward) {
+ preprocessor = new FeedForwardToRnnPreProcessor();
+ }
+ return preprocessor;
+ }
+
+
+
/**
* Set weights for layer.
*
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/trainedmodels/TrainedModelHelper.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/trainedmodels/TrainedModelHelper.java
index 9e0aeeab3c79..6bf6742fb05e 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/trainedmodels/TrainedModelHelper.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/trainedmodels/TrainedModelHelper.java
@@ -2,8 +2,8 @@
import org.apache.commons.io.FileUtils;
import org.deeplearning4j.nn.graph.ComputationGraph;
-import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
+import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java
index 490e8c9064cf..64a10c3f2808 100644
--- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java
+++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java
@@ -23,7 +23,7 @@
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
-import org.deeplearning4j.nn.modelimport.keras.layers.*;
+import org.deeplearning4j.nn.modelimport.keras.layers.KerasInput;
import org.deeplearning4j.nn.modelimport.keras.layers.advanced.activations.KerasLeakyReLU;
import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.*;
import org.deeplearning4j.nn.modelimport.keras.layers.core.*;
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java
index 8c36c7d5c451..c53365487c54 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java
@@ -38,15 +38,15 @@ public class Keras1ModelConfigurationTest {
private ClassLoader classLoader = getClass().getClassLoader();
-// @Test
-// public void imdbLstmTfSequentialConfigTest() throws Exception {
-// runSequentialConfigTest("configs/keras1/imdb_lstm_tf_keras_1_config.json");
-// }
-//
-// @Test
-// public void imdbLstmThSequentialConfigTest() throws Exception {
-// runSequentialConfigTest("configs/keras1/imdb_lstm_th_keras_1_config.json");
-// }
+ @Test
+ public void imdbLstmTfSequentialConfigTest() throws Exception {
+ runSequentialConfigTest("configs/keras1/imdb_lstm_tf_keras_1_config.json", true);
+ }
+
+ @Test
+ public void imdbLstmThSequentialConfigTest() throws Exception {
+ runSequentialConfigTest("configs/keras1/imdb_lstm_th_keras_1_config.json", true);
+ }
@Test
public void mnistMlpTfSequentialConfigTest() throws Exception {
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java
index a59de366c2aa..8e3c4eb8d560 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java
@@ -38,15 +38,15 @@ public class Keras2ModelConfigurationTest {
ClassLoader classLoader = getClass().getClassLoader();
-// @Test
-// public void imdbLstmTfSequentialConfigTest() throws Exception {
-// runSequentialConfigTest("configs/keras2/imdb_lstm_tf_keras_2_config.json");
-// }
-//
-// @Test
-// public void imdbLstmThSequentialConfigTest() throws Exception {
-// runSequentialConfigTest("configs/keras2/imdb_lstm_th_keras_2_config.json");
-// }
+ @Test
+ public void imdbLstmTfSequentialConfigTest() throws Exception {
+ runSequentialConfigTest("configs/keras2/imdb_lstm_tf_keras_2_config.json");
+ }
+
+ @Test
+ public void imdbLstmThSequentialConfigTest() throws Exception {
+ runSequentialConfigTest("configs/keras2/imdb_lstm_th_keras_2_config.json");
+ }
@Test
public void mnistMlpTfSequentialConfigTest() throws Exception {
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java
index ea14c4662705..027ee832bb29 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java
@@ -17,9 +17,6 @@
*/
package org.deeplearning4j.nn.modelimport.keras.configurations;
-import java.io.File;
-import java.io.IOException;
-
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
@@ -27,6 +24,9 @@
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.Test;
+import java.io.File;
+import java.io.IOException;
+
/**
* Test import of Keras models.
*
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java
index 94f27219fb46..bc8f6cb509c8 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java
@@ -25,7 +25,6 @@
import org.deeplearning4j.nn.modelimport.keras.layers.custom.KerasLRN;
import org.deeplearning4j.nn.modelimport.keras.layers.custom.KerasPoolHelper;
import org.deeplearning4j.util.ModelSerializer;
-import org.junit.Test;
import java.io.File;
import java.net.URL;
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java
index a6a2503085cc..ae83c8f30650 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java
@@ -18,6 +18,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.Convolution1DLayer;
import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
@@ -95,7 +96,7 @@ public void buildAtrousConvolution1DLayer(KerasLayerConfiguration conf, Integer
assertEquals(INIT_DL4J, layer.getWeightInit());
assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0);
assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0);
- assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0);
+ assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout());
assertEquals(KERNEL_SIZE[0], layer.getKernelSize()[0]);
assertEquals(STRIDE[0], layer.getStride()[0]);
assertEquals(N_OUT, layer.getNOut());
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java
index afe3732a9f69..b6026eae9c7d 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java
@@ -18,6 +18,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
@@ -113,7 +114,7 @@ public void buildAtrousConvolution2DLayer(KerasLayerConfiguration conf, Integer
assertEquals(INIT_DL4J, layer.getWeightInit());
assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0);
assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0);
- assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0);
+ assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout());
assertArrayEquals(KERNEL_SIZE, layer.getKernelSize());
assertArrayEquals(STRIDE, layer.getStride());
assertEquals(N_OUT, layer.getNOut());
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java
index c2d5378a6cd6..58d67803569e 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java
@@ -18,6 +18,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.Convolution1DLayer;
import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
@@ -102,7 +103,7 @@ public void buildConvolution1DLayer(KerasLayerConfiguration conf, Integer kerasV
assertEquals(INIT_DL4J, layer.getWeightInit());
assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0);
assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0);
- assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0);
+ assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout());
assertEquals(KERNEL_SIZE[0], layer.getKernelSize()[0]);
assertEquals(STRIDE[0], layer.getStride()[0]);
assertEquals(N_OUT, layer.getNOut());
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java
index 3c9b7cec9da3..357a2133d1b3 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java
@@ -18,6 +18,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
@@ -122,7 +123,7 @@ public void buildConvolution2DLayer(KerasLayerConfiguration conf, Integer kerasV
assertEquals(INIT_DL4J, layer.getWeightInit());
assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0);
assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0);
- assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0);
+ assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout());
assertArrayEquals(KERNEL_SIZE, layer.getKernelSize());
assertArrayEquals(STRIDE, layer.getStride());
assertEquals(N_OUT, layer.getNOut());
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java
new file mode 100644
index 000000000000..2fa9b1e07a39
--- /dev/null
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java
@@ -0,0 +1,69 @@
+/*-
+ *
+ * * Copyright 2017 Skymind,Inc.
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
+
+import org.deeplearning4j.nn.conf.layers.Upsampling1D;
+import org.deeplearning4j.nn.conf.layers.Upsampling2D;
+import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
+import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
+import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
+import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasUpsampling1D;
+import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasUpsampling2D;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author Max Pumperla
+ */
+public class KerasUpsampling1DTest {
+
+ private final String LAYER_NAME = "upsampling_1D_layer";
+ private int size = 4;
+
+ private Integer keras1 = 1;
+ private Integer keras2 = 2;
+ private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
+ private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();
+
+ @Test
+ public void testUpsampling1DLayer() throws Exception {
+ buildUpsampling1DLayer(conf1, keras1);
+ buildUpsampling1DLayer(conf2, keras2);
+ }
+
+ public void buildUpsampling1DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception {
+ Map layerConfig = new HashMap<>();
+ layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_UPSAMPLING_1D());
+ Map config = new HashMap<>();
+ config.put(conf.getLAYER_FIELD_UPSAMPLING_1D_SIZE(), size);
+ config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME);
+ layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config);
+ layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion);
+
+ Upsampling1D layer = new KerasUpsampling1D(layerConfig).getUpsampling1DLayer();
+ assertEquals(LAYER_NAME, layer.getLayerName());
+ assertEquals(size, layer.getSize());
+ }
+
+}
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java
index ddac2e7e6c5b..2251523c91d6 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java
@@ -55,7 +55,7 @@ public void testUpsampling2DLayer() throws Exception {
public void buildUpsampling2DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception {
Map layerConfig = new HashMap<>();
- layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_MAX_POOLING_1D());
+ layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_UPSAMPLING_2D());
Map config = new HashMap<>();
List sizeList = new ArrayList<>();
sizeList.add(size[0]);
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java
index 8a7a69916eae..7bb3e258b682 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java
@@ -17,6 +17,7 @@
*/
package org.deeplearning4j.nn.modelimport.keras.layers.core;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@@ -85,7 +86,7 @@ void buildDenseLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws
assertEquals(INIT_DL4J, layer.getWeightInit());
assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0);
assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0);
- assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0);
+ assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout());
assertEquals(N_OUT, layer.getNOut());
}
}
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java
index 4f55b22db3ab..4e27ef4d32fc 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java
@@ -17,6 +17,7 @@
*/
package org.deeplearning4j.nn.modelimport.keras.layers.core;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@@ -61,7 +62,7 @@ public void buildDropoutLayer(KerasLayerConfiguration conf, Integer kerasVersion
DropoutLayer layer = new KerasDropout(layerConfig).getDropoutLayer();
assertEquals(LAYER_NAME, layer.getLayerName());
- assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0);
+ assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout());
}
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java
index fc6d48c87223..f4808c3ce561 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java
@@ -22,6 +22,7 @@
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
import org.junit.Test;
+import org.nd4j.linalg.learning.config.Nesterovs;
import java.util.HashMap;
import java.util.Map;
@@ -69,6 +70,6 @@ public void buildBatchNormalizationLayer(KerasLayerConfiguration conf, Integer k
BatchNormalization layer = new KerasBatchNormalization(layerConfig).getBatchNormalizationLayer();
assertEquals(LAYER_NAME, layer.getLayerName());
assertEquals(epsilon, layer.getEps(), 0.0);
- assertEquals(momentum, layer.getMomentum(), 0.0);
+ assertEquals(momentum, ((Nesterovs)layer.getIUpdater()).getMomentum(), 0.0);
}
}
diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java
index b58924e98e3e..61b81fe964c4 100644
--- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java
+++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java
@@ -17,6 +17,7 @@
*/
package org.deeplearning4j.nn.modelimport.keras.layers.recurrent;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.LSTM;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@@ -101,7 +102,7 @@ void buildLstmLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws E
assertEquals(INIT_DL4J, layer.getWeightInit());
assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0);
assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0);
- assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0);
+ assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout());
assertEquals(lstmForgetBiasDouble, layer.getForgetGateBiasInit(), 0.0);
assertEquals(N_OUT, layer.getNOut());
}
diff --git a/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_tf_keras_1_config.json b/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_tf_keras_1_config.json
index 1d7446d8b4ff..b3301b4b1a0d 100644
--- a/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_tf_keras_1_config.json
+++ b/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_tf_keras_1_config.json
@@ -36,8 +36,8 @@
"consume_less": "cpu",
"stateful": false,
"init": "glorot_uniform",
- "inner_init": "orthogonal",
- "dropout_U": 0.2,
+ "inner_init": "glorot_uniform",
+ "dropout_U": 0.0,
"dropout_W": 0.2,
"input_dim": 128,
"return_sequences": false,
diff --git a/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_th_keras_1_config.json b/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_th_keras_1_config.json
index 1d7446d8b4ff..d94a0c7b4bbd 100644
--- a/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_th_keras_1_config.json
+++ b/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_th_keras_1_config.json
@@ -18,7 +18,7 @@
null
],
"W_regularizer": null,
- "dropout": 0.2,
+ "dropout": 0.2,
"output_dim": 128,
"input_length": null
}
@@ -36,8 +36,8 @@
"consume_less": "cpu",
"stateful": false,
"init": "glorot_uniform",
- "inner_init": "orthogonal",
- "dropout_U": 0.2,
+ "inner_init": "glorot_uniform",
+ "dropout_U": 0.0,
"dropout_W": 0.2,
"input_dim": 128,
"return_sequences": false,
diff --git a/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_tf_keras_2_config.json b/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_tf_keras_2_config.json
index 77e2557de409..a897107f81e9 100644
--- a/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_tf_keras_2_config.json
+++ b/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_tf_keras_2_config.json
@@ -35,10 +35,12 @@
"recurrent_activation": "hard_sigmoid",
"trainable": true,
"recurrent_initializer": {
- "class_name": "Orthogonal",
+ "class_name": "VarianceScaling",
"config": {
- "seed": null,
- "gain": 1.0
+ "distribution": "uniform",
+ "scale": 1.0,
+ "seed": null,
+ "mode": "fan_avg"
}
},
"use_bias": true,
@@ -53,7 +55,7 @@
"units": 128,
"unit_forget_bias": true,
"activity_regularizer": null,
- "recurrent_dropout": 0.2,
+ "recurrent_dropout": 0.0,
"kernel_initializer": {
"class_name": "VarianceScaling",
"config": {
diff --git a/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_th_keras_2_config.json b/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_th_keras_2_config.json
index d79e70c4ea2e..0ecc5c8aefc9 100644
--- a/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_th_keras_2_config.json
+++ b/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_th_keras_2_config.json
@@ -35,10 +35,12 @@
"recurrent_activation": "hard_sigmoid",
"trainable": true,
"recurrent_initializer": {
- "class_name": "Orthogonal",
+ "class_name": "VarianceScaling",
"config": {
- "seed": null,
- "gain": 1.0
+ "distribution": "uniform",
+ "scale": 1.0,
+ "seed": null,
+ "mode": "fan_avg"
}
},
"use_bias": true,
@@ -53,7 +55,7 @@
"units": 128,
"unit_forget_bias": true,
"activity_regularizer": null,
- "recurrent_dropout": 0.2,
+ "recurrent_dropout": 0.0,
"kernel_initializer": {
"class_name": "VarianceScaling",
"config": {
diff --git a/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/vptree/VpTreeNodeTest.java b/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/vptree/VpTreeNodeTest.java
index 3667812b0127..275dfc8292ef 100644
--- a/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/vptree/VpTreeNodeTest.java
+++ b/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/vptree/VpTreeNodeTest.java
@@ -18,13 +18,12 @@
package org.deeplearning4j.clustering.vptree;
-import com.google.common.util.concurrent.AtomicDouble;
-import org.nd4j.linalg.primitives.Counter;
import org.deeplearning4j.clustering.sptree.DataPoint;
import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.exception.ND4JIllegalStateException;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Counter;
import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/BinarizeTreeTransformer.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/BinarizeTreeTransformer.java
index 1e67d20a8bc9..3f4986a9a6a8 100755
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/BinarizeTreeTransformer.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/BinarizeTreeTransformer.java
@@ -19,9 +19,9 @@
package org.deeplearning4j.text.corpora.treeparser;
import org.apache.commons.lang3.StringUtils;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.layers.feedforward.autoencoder.recursive.Tree;
import org.deeplearning4j.text.corpora.treeparser.transformer.TreeTransformer;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeFactory.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeFactory.java
index 29fd9fd8ab17..9e05d606a726 100755
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeFactory.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeFactory.java
@@ -25,9 +25,9 @@
import org.cleartk.syntax.constituent.type.TreebankNode;
import org.cleartk.syntax.constituent.type.TreebankNodeUtil;
import org.cleartk.token.type.Token;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.layers.feedforward.autoencoder.recursive.Tree;
import org.deeplearning4j.util.MultiDimensionalMap;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.Arrays;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeParser.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeParser.java
index 1cb92c93187a..f07f4b009174 100755
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeParser.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeParser.java
@@ -29,7 +29,6 @@
import org.cleartk.token.type.Sentence;
import org.cleartk.token.type.Token;
import org.cleartk.util.ParamUtil;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.layers.feedforward.autoencoder.recursive.Tree;
import org.deeplearning4j.text.annotator.PoStagger;
import org.deeplearning4j.text.annotator.SentenceAnnotator;
@@ -41,6 +40,7 @@
import org.deeplearning4j.text.tokenization.tokenizerfactory.UimaTokenizerFactory;
import org.deeplearning4j.util.MultiDimensionalMap;
import org.deeplearning4j.util.SetUtils;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/util/ContextLabelTest.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/util/ContextLabelTest.java
index f8ce6a2c6928..a353d7ad5c7d 100755
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/util/ContextLabelTest.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/util/ContextLabelTest.java
@@ -18,12 +18,12 @@
package org.deeplearning4j.util;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.text.movingwindow.ContextLabelRetriever;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
import org.deeplearning4j.text.tokenization.tokenizerfactory.UimaTokenizerFactory;
import org.junit.Before;
import org.junit.Test;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/CnnSentenceDataSetIterator.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/CnnSentenceDataSetIterator.java
index 412f353a9348..a4e655a11c8d 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/CnnSentenceDataSetIterator.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/CnnSentenceDataSetIterator.java
@@ -2,7 +2,6 @@
import lombok.AllArgsConstructor;
import lombok.NonNull;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.iterator.provider.LabelAwareConverter;
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors;
import org.deeplearning4j.text.documentiterator.LabelAwareDocumentIterator;
@@ -20,6 +19,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
import java.util.*;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/CollectionLabeledSentenceProvider.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/CollectionLabeledSentenceProvider.java
index b5809d6c35f6..26c25b3bdf0d 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/CollectionLabeledSentenceProvider.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/CollectionLabeledSentenceProvider.java
@@ -2,8 +2,8 @@
import lombok.NonNull;
import org.datavec.api.util.RandomUtils;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.iterator.LabeledSentenceProvider;
+import org.nd4j.linalg.primitives.Pair;
import java.util.*;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/FileLabeledSentenceProvider.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/FileLabeledSentenceProvider.java
index 5b565507e036..6f3d5bda64b4 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/FileLabeledSentenceProvider.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/FileLabeledSentenceProvider.java
@@ -3,9 +3,9 @@
import lombok.NonNull;
import org.apache.commons.io.FileUtils;
import org.datavec.api.util.RandomUtils;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.iterator.LabeledSentenceProvider;
import org.nd4j.linalg.collection.CompactHeapStringList;
+import org.nd4j.linalg.primitives.Pair;
import java.io.File;
import java.io.IOException;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/LabelAwareConverter.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/LabelAwareConverter.java
index 503f6bb07da1..2a1f0b2f54d7 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/LabelAwareConverter.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/LabelAwareConverter.java
@@ -1,10 +1,10 @@
package org.deeplearning4j.iterator.provider;
import lombok.NonNull;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.iterator.LabeledSentenceProvider;
import org.deeplearning4j.text.documentiterator.LabelAwareIterator;
import org.deeplearning4j.text.documentiterator.LabelledDocument;
+import org.nd4j.linalg.primitives.Pair;
import java.util.List;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/learning/impl/elements/GloVe.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/learning/impl/elements/GloVe.java
index 4192ab53a3e6..b17f7777862c 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/learning/impl/elements/GloVe.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/learning/impl/elements/GloVe.java
@@ -1,8 +1,6 @@
package org.deeplearning4j.models.embeddings.learning.impl.elements;
import lombok.NonNull;
-import org.nd4j.linalg.primitives.Counter;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.embeddings.WeightLookupTable;
import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
import org.deeplearning4j.models.embeddings.learning.ElementsLearningAlgorithm;
@@ -15,6 +13,8 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.learning.legacy.AdaGrad;
+import org.nd4j.linalg.primitives.Counter;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java
index ed1b230bdbd1..8bcf200040c0 100755
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java
@@ -27,7 +27,6 @@
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.io.output.CloseShieldOutputStream;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.exception.DL4JInvalidInputException;
import org.deeplearning4j.models.embeddings.WeightLookupTable;
import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
@@ -59,6 +58,7 @@
import org.nd4j.linalg.exception.ND4JIllegalStateException;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.shade.jackson.databind.DeserializationFeature;
import org.nd4j.shade.jackson.databind.MapperFeature;
import org.nd4j.shade.jackson.databind.ObjectMapper;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/BasicModelUtils.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/BasicModelUtils.java
index 6d729ce5d88c..dc2354c67642 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/BasicModelUtils.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/BasicModelUtils.java
@@ -5,7 +5,6 @@
import lombok.Data;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
-import org.nd4j.linalg.primitives.Counter;
import org.deeplearning4j.models.embeddings.WeightLookupTable;
import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
import org.deeplearning4j.models.embeddings.reader.ModelUtils;
@@ -16,6 +15,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Counter;
import java.util.*;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/FlatModelUtils.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/FlatModelUtils.java
index c3bac8891144..ae9a339613c4 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/FlatModelUtils.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/FlatModelUtils.java
@@ -1,9 +1,9 @@
package org.deeplearning4j.models.embeddings.reader.impl;
-import org.nd4j.linalg.primitives.Counter;
import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Counter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java
index 73bf2492a664..b04511f4be18 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java
@@ -1,7 +1,6 @@
package org.deeplearning4j.models.glove;
import lombok.NonNull;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.glove.count.*;
import org.deeplearning4j.models.sequencevectors.interfaces.SequenceIterator;
import org.deeplearning4j.models.sequencevectors.iterators.FilteredSequenceIterator;
@@ -14,6 +13,7 @@
import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
import org.deeplearning4j.text.sentenceiterator.SynchronizedSentenceIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/count/CountMap.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/count/CountMap.java
index 963714d25573..a723a30f3515 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/count/CountMap.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/count/CountMap.java
@@ -1,8 +1,8 @@
package org.deeplearning4j.models.glove.count;
import com.google.common.util.concurrent.AtomicDouble;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Iterator;
import java.util.Map;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java
index 764c6917eb84..3dd0674f7809 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java
@@ -4,8 +4,6 @@
import lombok.Getter;
import lombok.NonNull;
import lombok.Setter;
-import org.nd4j.linalg.primitives.Counter;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.embeddings.WeightLookupTable;
import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
import org.deeplearning4j.models.embeddings.learning.ElementsLearningAlgorithm;
@@ -34,6 +32,8 @@
import org.nd4j.linalg.exception.ND4JIllegalStateException;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Counter;
+import org.nd4j.linalg.primitives.Pair;
import java.util.*;
import java.util.concurrent.*;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/inmemory/InMemoryLookupCache.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/inmemory/InMemoryLookupCache.java
index 9722214d8b60..8774cb4e3e22 100755
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/inmemory/InMemoryLookupCache.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/inmemory/InMemoryLookupCache.java
@@ -18,12 +18,12 @@
package org.deeplearning4j.models.word2vec.wordstore.inmemory;
-import org.nd4j.linalg.primitives.Counter;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.text.movingwindow.Util;
import org.deeplearning4j.util.Index;
import org.deeplearning4j.util.SerializationUtils;
+import org.nd4j.linalg.primitives.Counter;
import java.io.File;
import java.io.InputStream;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/invertedindex/InvertedIndex.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/invertedindex/InvertedIndex.java
index 69015950ee6e..714bde9d55c6 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/invertedindex/InvertedIndex.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/invertedindex/InvertedIndex.java
@@ -19,8 +19,8 @@
package org.deeplearning4j.text.invertedindex;
import com.google.common.base.Function;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement;
+import org.nd4j.linalg.primitives.Pair;
import java.io.Serializable;
import java.util.Collection;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/ContextLabelRetriever.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/ContextLabelRetriever.java
index 7255ec87cb72..bd65b87bfa44 100755
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/ContextLabelRetriever.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/ContextLabelRetriever.java
@@ -18,11 +18,11 @@
package org.deeplearning4j.text.movingwindow;
-import org.deeplearning4j.util.StringUtils;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
import org.deeplearning4j.util.MultiDimensionalMap;
+import org.deeplearning4j.util.StringUtils;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.List;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/Util.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/Util.java
index 924051a47fa0..2c691e019e05 100755
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/Util.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/Util.java
@@ -22,7 +22,6 @@
import org.nd4j.linalg.primitives.CounterMap;
import java.util.List;
-import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/glove/AbstractCoOccurrencesTest.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/glove/AbstractCoOccurrencesTest.java
index 5ac40a9d29f7..e18182bb2bff 100644
--- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/glove/AbstractCoOccurrencesTest.java
+++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/glove/AbstractCoOccurrencesTest.java
@@ -1,7 +1,6 @@
package org.deeplearning4j.models.glove;
import org.datavec.api.util.ClassPathResource;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.sequencevectors.iterators.AbstractSequenceIterator;
import org.deeplearning4j.models.sequencevectors.transformers.impl.SentenceTransformer;
import org.deeplearning4j.models.word2vec.VocabWord;
@@ -13,6 +12,7 @@
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
import org.junit.Before;
import org.junit.Test;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIterator.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIterator.java
index b6b4cbd12bc5..54677169a5d2 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIterator.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIterator.java
@@ -1,12 +1,12 @@
package org.deeplearning4j.datasets.iterator;
import lombok.NonNull;
-import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.DataSetPreProcessor;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.Iterator;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/INDArrayDataSetIterator.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/INDArrayDataSetIterator.java
index 2d4414e3cc7a..e4bbefafe6cf 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/INDArrayDataSetIterator.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/INDArrayDataSetIterator.java
@@ -1,8 +1,8 @@
package org.deeplearning4j.datasets.iterator;
import lombok.NonNull;
-import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* @author raver119@gmail.com
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java
index c7c7fbd2921c..c461a30d2092 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java
@@ -2,10 +2,10 @@
import lombok.EqualsAndHashCode;
import lombok.Getter;
+import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.primitives.AtomicBoolean;
import org.nd4j.linalg.primitives.AtomicDouble;
import org.nd4j.linalg.primitives.Pair;
-import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.primitives.serde.JsonDeserializerAtomicBoolean;
import org.nd4j.linalg.primitives.serde.JsonDeserializerAtomicDouble;
import org.nd4j.linalg.primitives.serde.JsonSerializerAtomicBoolean;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java
index 414db173f4cf..69beee2c56e1 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java
@@ -22,8 +22,6 @@
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
-import org.nd4j.linalg.primitives.Counter;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.eval.meta.Prediction;
import org.deeplearning4j.eval.serde.ConfusionMatrixDeserializer;
import org.deeplearning4j.eval.serde.ConfusionMatrixSerializer;
@@ -37,6 +35,8 @@
import org.nd4j.linalg.indexing.conditions.Conditions;
import org.nd4j.linalg.lossfunctions.serde.RowVectorDeserializer;
import org.nd4j.linalg.lossfunctions.serde.RowVectorSerializer;
+import org.nd4j.linalg.primitives.Counter;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties;
import org.nd4j.shade.jackson.databind.annotation.JsonDeserialize;
import org.nd4j.shade.jackson.databind.annotation.JsonSerialize;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/EvaluationUtils.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/EvaluationUtils.java
index 1ba2c13e727c..d4cae93bd7e3 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/EvaluationUtils.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/EvaluationUtils.java
@@ -1,9 +1,9 @@
package org.deeplearning4j.eval;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.util.TimeSeriesUtils;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/ROC.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/ROC.java
index c848c2468ea1..7f46be8b5954 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/ROC.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/ROC.java
@@ -2,7 +2,6 @@
import lombok.*;
import org.apache.commons.lang3.ArrayUtils;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.eval.curves.PrecisionRecallCurve;
import org.deeplearning4j.eval.curves.RocCurve;
import org.deeplearning4j.eval.serde.ROCSerializer;
@@ -15,6 +14,7 @@
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.indexing.conditions.Condition;
import org.nd4j.linalg.indexing.conditions.Conditions;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties;
import org.nd4j.shade.jackson.annotation.JsonTypeInfo;
import org.nd4j.shade.jackson.databind.annotation.JsonSerialize;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java
index 5ac6ddb7115c..a0fe0778c2c6 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java
@@ -31,6 +31,7 @@
import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.lossfunctions.impl.LossMCXENT;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.Arrays;
@@ -133,7 +134,7 @@ public static boolean checkGradients(MultiLayerNetwork mln, double epsilon, doub
IUpdater u = bl.getIUpdater();
if (u instanceof Sgd) {
//Must have LR of 1.0
- double lr = bl.getLearningRate();
+ double lr = ((Sgd) u).getLearningRate();
if (lr != 1.0) {
throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer "
+ layerCount + "; got " + u + " with lr=" + lr + " for layer \""
@@ -155,10 +156,9 @@ public static boolean checkGradients(MultiLayerNetwork mln, double epsilon, doub
}
}
- double dropout = n.getLayer().getDropOut();
- if (dropout != 0.0) {
- throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = "
- + dropout + " for layer " + layerCount);
+ if (n.getLayer().getIDropout() != null) {
+ throw new IllegalStateException("Must have no dropout for gradient checks - got dropout = "
+ + n.getLayer().getIDropout() + " for layer " + layerCount);
}
}
@@ -175,7 +175,7 @@ public static boolean checkGradients(MultiLayerNetwork mln, double epsilon, doub
Pair gradAndScore = mln.gradientAndScore();
Updater updater = UpdaterCreator.getUpdater(mln);
- updater.update(mln, gradAndScore.getFirst(), 0, mln.batchSize());
+ updater.update(mln, gradAndScore.getFirst(), 0, 0, mln.batchSize());
INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
INDArray originalParams = mln.params().dup(); //need dup: params are a *view* of full parameters
@@ -313,7 +313,7 @@ public static boolean checkGradients(ComputationGraph graph, double epsilon, dou
IUpdater u = bl.getIUpdater();
if (u instanceof Sgd) {
//Must have LR of 1.0
- double lr = bl.getLearningRate();
+ double lr = ((Sgd) u).getLearningRate();
if (lr != 1.0) {
throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer "
+ layerCount + "; got " + u + " with lr=" + lr + " for layer \""
@@ -335,10 +335,9 @@ public static boolean checkGradients(ComputationGraph graph, double epsilon, dou
}
}
- double dropout = lv.getLayerConf().getLayer().getDropOut();
- if (dropout != 0.0) {
- throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = "
- + dropout + " for layer " + layerCount);
+ if (lv.getLayerConf().getLayer().getIDropout() != null) {
+ throw new IllegalStateException("Must have no dropout for gradient checks - got dropout = "
+ + lv.getLayerConf().getLayer().getIDropout() + " for layer " + layerCount);
}
}
@@ -358,7 +357,7 @@ public static boolean checkGradients(ComputationGraph graph, double epsilon, dou
Pair gradAndScore = graph.gradientAndScore();
ComputationGraphUpdater updater = new ComputationGraphUpdater(graph);
- updater.update(gradAndScore.getFirst(), 0, graph.batchSize());
+ updater.update(gradAndScore.getFirst(), 0, 0, graph.batchSize());
INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
INDArray originalParams = graph.params().dup(); //need dup: params are a *view* of full parameters
@@ -474,7 +473,7 @@ public static boolean checkGradientsPretrainLayer(Layer layer, double epsilon, d
Pair gradAndScore = layer.gradientAndScore();
Updater updater = UpdaterCreator.getUpdater(layer);
- updater.update(layer, gradAndScore.getFirst(), 0, layer.batchSize());
+ updater.update(layer, gradAndScore.getFirst(), 0, 0, layer.batchSize());
INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
INDArray originalParams = layer.params().dup(); //need dup: params are a *view* of full parameters
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java
index 494afa7ba6ce..0ce811102823 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java
@@ -19,11 +19,11 @@
package org.deeplearning4j.nn.api;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.conf.CacheMode;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.optimize.api.IterationListener;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.io.Serializable;
import java.util.Collection;
@@ -77,45 +77,6 @@ enum TrainingMode {
*/
Type type();
- /**
- * Calculate error with respect to the
- * current layer.
- *
- * This gradient will contain the error signal
- * @param input the gradient for the forward layer
- * If this is the final layer, it will start
- * with the error from the output.
- * This is on the user to initialize.
- * @return the gradient wrt the parameters
- * on the current layer
- * @deprecated As of 0.7.3 - Feb 2017. No longer used.
- */
- @Deprecated
- Gradient error(INDArray input);
-
-
-
- /**
- * Take the derivative of the given input
- * based on the activation
- * @param input the input to take the derivative of
- * @return the derivative of the action
- * @deprecated As of 0.7.3 - Feb 2017. No longer used.
- */
- @Deprecated
- INDArray derivativeActivation(INDArray input);
-
-
- /**
- * Calculate the gradient
- * @param layerError the layer error
- * @param indArray
- * @return the gradient
- * @deprecated As of 0.7.3 - Feb 2017. No longer used.
- */
- @Deprecated
- Gradient calcGradient(Gradient layerError, INDArray indArray);
-
/**Calculate the gradient relative to the error in the next layer
* @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where C
@@ -126,26 +87,6 @@ enum TrainingMode {
*/
Pair backpropGradient(INDArray epsilon);
-
- /**
- * Parameter averaging
- * @param layer the layer to merge
- * @param batchSize the batch size to merge on
- * @deprecated As of 0.7.3 - Feb 2017. No longer used. Merging (for parameter averaging) done via alternative means
- */
- @Deprecated
- void merge(Layer layer, int batchSize);
-
-
- /**
- * Calculate the mean representation
- * for the activation for this layer
- * @return the activation mean for this layer
- * @deprecated As of 0.7.3 - Feb 2017. No longer used.
- */
- @Deprecated
- INDArray activationMean();
-
/**
* Raw activations
* @param x the input to transform
@@ -230,6 +171,7 @@ enum TrainingMode {
*
* @return the transposed layer
*/
+ @Deprecated
Layer transpose();
/**
@@ -265,6 +207,26 @@ enum TrainingMode {
*/
int getIndex();
+ /**
+ * @return The current iteration count (number of parameter updates) for the layer/network
+ */
+ int getIterationCount();
+
+ /**
+ * @return The current epoch count (number of training epochs passed) for the layer/network
+ */
+ int getEpochCount();
+
+ /**
+ * Set the current iteration count (number of parameter updates) for the layer/network
+ */
+ void setIterationCount(int iterationCount);
+
+ /**
+ * Set the current epoch count (number of epochs passed ) for the layer/network
+ */
+ void setEpochCount(int epochCount);
+
/**
* Get the layer input.
*/
@@ -300,6 +262,9 @@ enum TrainingMode {
boolean isPretrainLayer();
+ void clearNoiseWeightParams();
+
+
/**
* Feed forward the input mask array, setting in in the layer as appropriate. This allows different layers to
* handle masks differently - for example, bidirectional RNNs and normal RNNs operate differently with masks (the
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java
index 954bf2a72d29..b8f64ce3926a 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java
@@ -18,12 +18,12 @@
package org.deeplearning4j.nn.api;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.optimize.api.ConvexOptimizer;
import org.deeplearning4j.optimize.api.IterationListener;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Collection;
import java.util.Map;
@@ -144,14 +144,6 @@ public interface Model {
*/
void setBackpropGradientsViewArray(INDArray gradients);
- /**
- * Update learningRate using for this model.
- * Use the learningRateScoreBasedDecay to adapt the score
- * if the Eps termination condition is met
- */
- void applyLearningRateScoreDecay();
-
-
/**
* Fit the model to the given data
* @param data the data to fit the model to
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/NeuralNetworkPrototype.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/NeuralNetworkPrototype.java
deleted file mode 100644
index 4cfc90796b4d..000000000000
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/NeuralNetworkPrototype.java
+++ /dev/null
@@ -1,70 +0,0 @@
-package org.deeplearning4j.nn.api;
-
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.dataset.api.DataSet;
-import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
-
-import java.util.Map;
-
-/**
- *
- * @author Alex Black
- * @author raver119@gmail.com
- */
-public interface NeuralNetworkPrototype {
- /*
- Model params section
- */
- INDArray getParams();
-
- Updater getUpdater();
-
- double getScore();
-
- T getConfiguration();
-
- /*
- Layers section
- */
- // however, we can replicate to actual structure
- Layer[] getLayers();
-
-
- /*
- Fitting section
- */
- // we should have unified dataset here
- void fit(DataSet dataSet);
-
- // should be unified iterator too
- void fit(DataSetIterator iterator);
-
- // same, iterator unification would be nice to see here
- void pretrain(DataSetIterator iterator);
-
-
- /*
- Output section
- */
- Map activations(INDArray input);
-
- INDArray output(INDArray input);
-
- INDArray[] output(INDArray... input);
-
-
- /*
- RNN section
- */
- void rnnClearPreviousState();
-
- Map> rnnGetPreviousStates();
-
- void rnnTimeStep(INDArray... input);
-
-
- /*
- Evaluation section
- */
- // why exactly we have Evaluation class AND evaluation code in MLN/CG at the same time?
-}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java
index a4edac1632ea..dc7b3c1df63d 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java
@@ -33,5 +33,5 @@ public interface Updater extends Serializable {
* @param gradient
* @param iteration
*/
- void update(Layer layer, Gradient gradient, int iteration, int miniBatchSize);
+ void update(Layer layer, Gradient gradient, int iteration, int epoch, int miniBatchSize);
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java
index 11d0fc5eb8cb..0b515e8fe004 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java
@@ -1,11 +1,9 @@
package org.deeplearning4j.nn.api.layers;
-import lombok.EqualsAndHashCode;
import org.deeplearning4j.nn.api.Layer;
import org.nd4j.shade.jackson.annotation.JsonTypeInfo;
import java.io.Serializable;
-import java.util.List;
import java.util.Set;
@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class")
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java
index 33310ef6c049..4a5426e29742 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java
@@ -16,10 +16,10 @@
package org.deeplearning4j.nn.api.layers;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.gradient.Gradient;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Map;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java
index 4864e8076917..b714eb67cf70 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java
@@ -639,6 +639,19 @@ public GraphBuilder addLayer(String layerName, Layer layer, String... layerInput
return addLayer(layerName, layer, null, layerInputs);
}
+ /**
+ * Add a layer, with no {@link InputPreProcessor}, with the specified name and specified inputs.
+ *
+ * @param layerName Name/label of the layer to add
+ * @param layer The layer configuration
+ * @param layerInputs Inputs to this layer (must be 1 or more). Inputs may be other layers, GraphVertex objects,
+ * on a combination of the two.
+ * @see #addLayer(String, Layer, InputPreProcessor, String...)
+ */
+ public GraphBuilder layer(String layerName, Layer layer, String... layerInputs) {
+ return addLayer(layerName, layer, null, layerInputs);
+ }
+
/**
* Add a layer and an {@link InputPreProcessor}, with the specified name and specified inputs.
*
@@ -657,6 +670,20 @@ public GraphBuilder addLayer(String layerName, Layer layer, InputPreProcessor pr
return this;
}
+ /**
+ * Add a layer and an {@link InputPreProcessor}, with the specified name and specified inputs.
+ *
+ * @param layerName Name/label of the layer to add
+ * @param layer The layer configuration
+ * @param preProcessor The InputPreProcessor to use with this layer.
+ * @param layerInputs Inputs to this layer (must be 1 or more). Inputs may be other layers, GraphVertex objects,
+ * on a combination of the two.
+ */
+ public GraphBuilder layer(String layerName, Layer layer, InputPreProcessor preProcessor,
+ String... layerInputs) {
+ return addLayer(layerName, layer, preProcessor, layerInputs);
+ }
+
/**
* Intended for use with the transfer learning API. Users discouraged from employing it directly.
* Removes the specified vertex from the vertices list, it's connections and associated preprocessor
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java
index 061af838be3c..1647fac39550 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java
@@ -19,11 +19,11 @@
package org.deeplearning4j.nn.conf;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.preprocessor.*;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.shade.jackson.annotation.JsonSubTypes;
import org.nd4j.shade.jackson.annotation.JsonTypeInfo;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java
index 488552a40c29..28b56acde9ae 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java
@@ -26,8 +26,9 @@
import org.apache.commons.lang3.ClassUtils;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.api.layers.LayerConstraint;
-import org.deeplearning4j.nn.conf.constraint.BaseConstraint;
import org.deeplearning4j.nn.conf.distribution.Distribution;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
+import org.deeplearning4j.nn.conf.dropout.IDropout;
import org.deeplearning4j.nn.conf.graph.GraphVertex;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
@@ -36,13 +37,15 @@
import org.deeplearning4j.nn.conf.serde.ComputationGraphConfigurationDeserializer;
import org.deeplearning4j.nn.conf.serde.MultiLayerConfigurationDeserializer;
import org.deeplearning4j.nn.conf.stepfunctions.StepFunction;
+import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.util.reflections.DL4JSubTypesScanner;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.activations.impl.ActivationSigmoid;
import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.learning.config.*;
+import org.nd4j.linalg.learning.config.IUpdater;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.shade.jackson.databind.*;
import org.nd4j.shade.jackson.databind.deser.BeanDeserializerModifier;
@@ -85,8 +88,6 @@ public class NeuralNetConfiguration implements Serializable, Cloneable {
public static final String CUSTOM_FUNCTIONALITY = "org.deeplearning4j.config.custom.enabled";
protected Layer layer;
- @Deprecated
- protected double leakyreluAlpha;
//batch size: primarily used for conv nets. Will be reinforced if set.
protected boolean miniBatch = true;
protected int numIterations;
@@ -97,19 +98,11 @@ public class NeuralNetConfiguration implements Serializable, Cloneable {
//gradient keys used for ensuring order when getting and setting the gradient
protected List variables = new ArrayList<>();
//whether to constrain the gradient to unit norm or not
- //adadelta - weight for how much to consider previous history
protected StepFunction stepFunction;
- protected boolean useDropConnect = false;
//minimize or maximize objective
protected boolean minimize = true;
- // Graves LSTM & RNN
- protected Map learningRateByParam = new HashMap<>();
protected Map l1ByParam = new HashMap<>();
protected Map l2ByParam = new HashMap<>();
- protected LearningRatePolicy learningRatePolicy = LearningRatePolicy.None;
- protected double lrPolicyDecayRate;
- protected double lrPolicySteps;
- protected double lrPolicyPower;
protected boolean pretrain;
// this field defines preOutput cache
@@ -143,8 +136,6 @@ public NeuralNetConfiguration clone() {
clone.stepFunction = clone.stepFunction.clone();
if (clone.variables != null)
clone.variables = new ArrayList<>(clone.variables);
- if (clone.learningRateByParam != null)
- clone.learningRateByParam = new HashMap<>(clone.learningRateByParam);
if (clone.l1ByParam != null)
clone.l1ByParam = new HashMap<>(clone.l1ByParam);
if (clone.l2ByParam != null)
@@ -176,7 +167,6 @@ public void clearVariables() {
variables.clear();
l1ByParam.clear();
l2ByParam.clear();
- learningRateByParam.clear();
}
public void resetVariables() {
@@ -186,26 +176,16 @@ public void resetVariables() {
}
public void setLayerParamLR(String variable) {
- double lr = layer.getLearningRateByParam(variable);
double l1 = layer.getL1ByParam(variable);
if (Double.isNaN(l1))
l1 = 0.0; //Not set
double l2 = layer.getL2ByParam(variable);
if (Double.isNaN(l2))
l2 = 0.0; //Not set
- learningRateByParam.put(variable, lr);
l1ByParam.put(variable, l1);
l2ByParam.put(variable, l2);
}
- public double getLearningRateByParam(String variable) {
- return learningRateByParam.get(variable);
- }
-
- public void setLearningRateByParam(String variable, double rate) {
- learningRateByParam.put(variable, rate);
- }
-
public double getL1ByParam(String variable) {
return l1ByParam.get(variable);
}
@@ -592,49 +572,24 @@ public static class Builder implements Cloneable {
protected WeightInit weightInit = WeightInit.XAVIER;
protected double biasInit = 0.0;
protected Distribution dist = null;
- protected double learningRate = 1e-1;
- protected double biasLearningRate = Double.NaN;
- protected Map learningRateSchedule = null;
- protected double lrScoreBasedDecay;
protected double l1 = Double.NaN;
protected double l2 = Double.NaN;
protected double l1Bias = Double.NaN;
protected double l2Bias = Double.NaN;
- protected double dropOut = 0;
- @Deprecated
- protected Updater updater = Updater.SGD;
+ protected IDropout idropOut;
+ protected IWeightNoise weightNoise;
protected IUpdater iUpdater = new Sgd();
- @Deprecated
- protected double momentum = Double.NaN;
- @Deprecated
- protected Map momentumSchedule = null;
- @Deprecated
- protected double epsilon = Double.NaN;
- @Deprecated
- protected double rho = Double.NaN;
- @Deprecated
- protected double rmsDecay = Double.NaN;
- @Deprecated
- protected double adamMeanDecay = Double.NaN;
- @Deprecated
- protected double adamVarDecay = Double.NaN;
+ protected IUpdater biasUpdater = null;
protected Layer layer;
- @Deprecated
- protected double leakyreluAlpha = 0.01;
protected boolean miniBatch = true;
protected int numIterations = 1;
protected int maxNumLineSearchIterations = 5;
protected long seed = System.currentTimeMillis();
protected OptimizationAlgorithm optimizationAlgo = OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT;
protected StepFunction stepFunction = null;
- protected boolean useDropConnect = false;
protected boolean minimize = true;
protected GradientNormalization gradientNormalization = GradientNormalization.None;
protected double gradientNormalizationThreshold = 1.0;
- protected LearningRatePolicy learningRatePolicy = LearningRatePolicy.None;
- protected double lrPolicyDecayRate = Double.NaN;
- protected double lrPolicySteps = Double.NaN;
- protected double lrPolicyPower = Double.NaN;
protected boolean pretrain = false;
protected List allParamConstraints;
protected List weightConstraints;
@@ -659,12 +614,7 @@ public Builder(NeuralNetConfiguration newConf) {
optimizationAlgo = newConf.optimizationAlgo;
seed = newConf.seed;
stepFunction = newConf.stepFunction;
- useDropConnect = newConf.useDropConnect;
miniBatch = newConf.miniBatch;
- learningRatePolicy = newConf.learningRatePolicy;
- lrPolicyDecayRate = newConf.lrPolicyDecayRate;
- lrPolicySteps = newConf.lrPolicySteps;
- lrPolicyPower = newConf.lrPolicyPower;
pretrain = newConf.pretrain;
}
}
@@ -720,18 +670,6 @@ public Builder cacheMode(@NonNull CacheMode cacheMode) {
return this;
}
- /**
- * Use drop connect: multiply the weight by a binomial sampling wrt the dropout probability.
- * Dropconnect probability is set using {@link #dropOut(double)}; this is the probability of retaining a weight
- *
- * @param useDropConnect whether to use drop connect or not
- * @return the
- */
- public Builder useDropConnect(boolean useDropConnect) {
- this.useDropConnect = useDropConnect;
- return this;
- }
-
/**
* Objective function to minimize or maximize cost function
* Default set to minimize true.
@@ -769,6 +707,7 @@ public Builder layer(Layer layer) {
* Options: DefaultStepFunction (default), NegativeDefaultStepFunction
* GradientStepFunction (for SGD), NegativeGradientStepFunction
*/
+ @Deprecated
public Builder stepFunction(StepFunction stepFunction) {
this.stepFunction = stepFunction;
return this;
@@ -779,9 +718,9 @@ public Builder stepFunction(StepFunction stepFunction) {
* Usage:
*
* {@code .list()
- * .layer(0,new DenseLayer.Builder()...build())
+ * .layer(new DenseLayer.Builder()...build())
* ...
- * .layer(n,new OutputLayer.Builder()...build())
+ * .layer(new OutputLayer.Builder()...build())
* }
*
*/
@@ -823,22 +762,14 @@ public ComputationGraphConfiguration.GraphBuilder graphBuilder() {
}
/**
- * Number of optimization iterations.
+ * Number of optimization iterations. Should be set to 1 for >99% of use cases (possible exception:
+ * very tiny full batch dataset training)
*/
public Builder iterations(int numIterations) {
this.numIterations = numIterations;
return this;
}
- /**
- * Random number generator seed. Used for reproducability between runs
- */
- public Builder seed(int seed) {
- this.seed = (long) seed;
- Nd4j.getRandom().setSeed(seed);
- return this;
- }
-
/**
* Random number generator seed. Used for reproducability between runs
*/
@@ -858,14 +789,6 @@ public Builder optimizationAlgo(OptimizationAlgorithm optimizationAlgo) {
return this;
}
- /**
- * @deprecated Now: no-op. Regularization is always used when l1/l2/dropout is > 0
- */
- @Deprecated
- public Builder regularization(boolean useRegularization) {
- return this;
- }
-
@Override
public Builder clone() {
try {
@@ -882,20 +805,6 @@ public Builder clone() {
}
}
- /**
- * Activation function / neuron non-linearity
- * Typical values include:
- * "relu" (rectified linear), "tanh", "sigmoid", "softmax",
- * "hardtanh", "leakyrelu", "maxout", "softsign", "softplus"
- *
- * @deprecated Use {@link #activation(Activation)} or
- * {@link @activation(IActivation)}
- */
- @Deprecated
- public Builder activation(String activationFunction) {
- return activation(Activation.fromString(activationFunction).getActivationFunction());
- }
-
/**
* Activation function / neuron non-linearity
*
@@ -913,15 +822,6 @@ public Builder activation(Activation activation) {
return activation(activation.getActivationFunction());
}
- /**
- * @deprecated Use {@link #activation(IActivation)} with leaky relu, setting alpha value directly in constructor.
- */
- @Deprecated
- public Builder leakyreluAlpha(double leakyreluAlpha) {
- this.leakyreluAlpha = leakyreluAlpha;
- return this;
- }
-
/**
* Weight initialization scheme.
*
@@ -951,39 +851,6 @@ public Builder dist(Distribution dist) {
return this;
}
- /**
- * Learning rate. Defaults to 1e-1
- */
- public Builder learningRate(double learningRate) {
- this.learningRate = learningRate;
- return this;
- }
-
- /**
- * Bias learning rate. Set this to apply a different learning rate to the bias
- */
- public Builder biasLearningRate(double biasLearningRate) {
- this.biasLearningRate = biasLearningRate;
- return this;
- }
-
- /**
- * Learning rate schedule. Map of the iteration to the learning rate to apply at that iteration.
- */
- public Builder learningRateSchedule(Map learningRateSchedule) {
- this.learningRateSchedule = learningRateSchedule;
- return this;
- }
-
- /**
- * Rate to decrease learningRate by when the score stops improving.
- * Learning rate is multiplied by this rate so ideally keep between 0 and 1.
- */
- public Builder learningRateScoreBasedDecayRate(double lrScoreBasedDecay) {
- this.lrScoreBasedDecay = lrScoreBasedDecay;
- return this;
- }
-
/**
* L1 regularization coefficient for the weights.
*/
@@ -1022,8 +889,6 @@ public Builder l2Bias(double l2Bias) {
* dropOut(0.0) is a special value / special case - when set to 0.0., dropout is disabled (not applied). Note
* that a dropout value of 1.0 is functionally equivalent to no dropout: i.e., 100% probability of retaining
* each input activation.
- * When {@link #useDropConnect(boolean)} is set to true (false by default), this method sets the drop connect
- * probability instead.
*
* Note 1: Dropout is applied at training time only - and is automatically not applied at test time
* (for evaluation, etc)
@@ -1037,139 +902,69 @@ public Builder l2Bias(double l2Bias) {
*
*
* @param inputRetainProbability Dropout probability (probability of retaining each input activation value for a layer)
+ * @see #dropOut(IDropout)
*/
public Builder dropOut(double inputRetainProbability) {
- this.dropOut = inputRetainProbability;
- return this;
+ return dropOut(new Dropout(inputRetainProbability));
}
/**
- * Momentum rate
- * Used only when Updater is set to {@link Updater#NESTEROVS}
+ * Set the dropout for all layers in this network
*
- * @deprecated Use {@code .updater(new Nesterov(momentum))} instead
+ * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout},
+ * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc
+ * @return
*/
- @Deprecated
- public Builder momentum(double momentum) {
- this.momentum = momentum;
+ public Builder dropOut(IDropout dropout){
+ this.idropOut = dropout;
return this;
}
/**
- * Momentum schedule. Map of the iteration to the momentum rate to apply at that iteration
- * Used only when Updater is set to {@link Updater#NESTEROVS}
+ * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and
+ * {@link org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) for the layers in this network.
*
- * @deprecated Use {@code .updater(Nesterov.builder().momentumSchedule(schedule).build())} instead
+ * @param weightNoise Weight noise instance to use
*/
- @Deprecated
- public Builder momentumAfter(Map momentumAfter) {
- this.momentumSchedule = momentumAfter;
+ public Builder weightNoise(IWeightNoise weightNoise){
+ this.weightNoise = weightNoise;
return this;
}
+
/**
- * Gradient updater. For example, Updater.SGD for standard stochastic gradient descent,
- * Updater.NESTEROV for Nesterov momentum, Updater.RSMPROP for RMSProp, etc.
- * Note: default hyperparameters are used with this method. Use {@link #updater(IUpdater)} to configure
- * the updater-specific hyperparameters.
- *
- * @see Updater
+ * @deprecated Use {@link #updater(IUpdater)}
*/
+ @Deprecated
public Builder updater(Updater updater) {
- this.updater = updater;
return updater(updater.getIUpdaterWithDefaultConfig());
}
/**
- * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam}
+ * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam}
* or {@link org.nd4j.linalg.learning.config.Nesterovs}
*
* @param updater Updater to use
*/
public Builder updater(IUpdater updater) {
- //Ensure legacy field is set...
- if (updater instanceof Sgd)
- this.updater = Updater.SGD;
- else if (updater instanceof Adam)
- this.updater = Updater.ADAM;
- else if (updater instanceof AdaMax)
- this.updater = Updater.ADAMAX;
- else if (updater instanceof AdaDelta)
- this.updater = Updater.ADADELTA;
- else if (updater instanceof Nesterovs)
- this.updater = Updater.NESTEROVS;
- else if (updater instanceof Nadam)
- this.updater = Updater.NADAM;
- else if (updater instanceof AdaGrad)
- this.updater = Updater.ADAGRAD;
- else if (updater instanceof RmsProp)
- this.updater = Updater.RMSPROP;
- else if (updater instanceof NoOp)
- this.updater = Updater.NONE;
this.iUpdater = updater;
return this;
}
/**
- * Ada delta coefficient
- *
- * @param rho
- * @deprecated use {@code .updater(new AdaDelta(rho,epsilon))} intead
- */
- @Deprecated
- public Builder rho(double rho) {
- this.rho = rho;
- return this;
- }
-
-
- /**
- * Epsilon value for updaters: Adam, RMSProp, Adagrad, Adadelta
- *
- * @param epsilon Epsilon value to use for adagrad or
- * @deprecated Use use {@code .updater(Adam.builder().epsilon(epsilon).build())} or similar instead
- */
- @Deprecated
- public Builder epsilon(double epsilon) {
- this.epsilon = epsilon;
- return this;
- }
-
- /**
- * Decay rate for RMSProp. Only applies if using .updater(Updater.RMSPROP)
- *
- * @deprecated use {@code .updater(new RmsProp(rmsDecay))} intead
- */
- @Deprecated
- public Builder rmsDecay(double rmsDecay) {
- this.rmsDecay = rmsDecay;
- return this;
- }
-
- /**
- * Mean decay rate for Adam updater. Only applies if using .updater(Updater.ADAM)
- *
- * @deprecated use {@code .updater(Adam.builder().beta1(adamMeanDecay).build())} intead
- */
- @Deprecated
- public Builder adamMeanDecay(double adamMeanDecay) {
- this.adamMeanDecay = adamMeanDecay;
- return this;
- }
-
- /**
- * Variance decay rate for Adam updater. Only applies if using .updater(Updater.ADAM)
+ * Gradient updater configuration, for the biases only. If not set, biases will use the updater as
+ * set by {@link #updater(IUpdater)}
*
- * @deprecated use {@code .updater(Adam.builder().beta2(adamVarDecay).build())} intead
+ * @param updater Updater to use for bias parameters
*/
- @Deprecated
- public Builder adamVarDecay(double adamVarDecay) {
- this.adamVarDecay = adamVarDecay;
+ public Builder biasUpdater(IUpdater updater){
+ this.biasUpdater = updater;
return this;
}
/**
* Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping etc.
+ * See {@link GradientNormalization} for details
*
* @param gradientNormalization Type of normalization to use. Defaults to None.
* @see GradientNormalization
@@ -1190,46 +985,6 @@ public Builder gradientNormalizationThreshold(double threshold) {
return this;
}
- /**
- * Learning rate decay policy. Used to adapt learning rate based on policy.
- *
- * @param policy Type of policy to use. Defaults to None.
- */
- public Builder learningRateDecayPolicy(LearningRatePolicy policy) {
- this.learningRatePolicy = policy;
- return this;
- }
-
- /**
- * Set the decay rate for the learning rate decay policy.
- *
- * @param lrPolicyDecayRate rate.
- */
- public Builder lrPolicyDecayRate(double lrPolicyDecayRate) {
- this.lrPolicyDecayRate = lrPolicyDecayRate;
- return this;
- }
-
- /**
- * Set the number of steps used for learning decay rate steps policy.
- *
- * @param lrPolicySteps number of steps
- */
- public Builder lrPolicySteps(double lrPolicySteps) {
- this.lrPolicySteps = lrPolicySteps;
- return this;
- }
-
- /**
- * Set the power used for learning rate inverse policy.
- *
- * @param lrPolicyPower power
- */
- public Builder lrPolicyPower(double lrPolicyPower) {
- this.lrPolicyPower = lrPolicyPower;
- return this;
- }
-
/**
* Sets the convolution mode for convolutional layers, which impacts padding and output sizes.
* See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
@@ -1276,53 +1031,6 @@ public Builder constrainWeights(LayerConstraint... constraints) {
return this;
}
- private void learningRateValidation(String layerName) {
- if (learningRatePolicy != LearningRatePolicy.None && Double.isNaN(lrPolicyDecayRate)) {
- //LR policy, if used, should have a decay rate. 2 exceptions: Map for schedule, and Poly + power param
- if (!(learningRatePolicy == LearningRatePolicy.Schedule && learningRateSchedule != null)
- && !(learningRatePolicy == LearningRatePolicy.Poly && !Double.isNaN(lrPolicyPower)))
- throw new IllegalStateException("Layer \"" + layerName
- + "\" learning rate policy decay rate (lrPolicyDecayRate) must be set to use learningRatePolicy.");
- }
- switch (learningRatePolicy) {
- case Inverse:
- case Poly:
- if (Double.isNaN(lrPolicyPower))
- throw new IllegalStateException("Layer \"" + layerName
- + "\" learning rate policy power (lrPolicyPower) must be set to use "
- + learningRatePolicy);
- break;
- case Step:
- case Sigmoid:
- if (Double.isNaN(lrPolicySteps))
- throw new IllegalStateException("Layer \"" + layerName
- + "\" learning rate policy steps (lrPolicySteps) must be set to use "
- + learningRatePolicy);
- break;
- case Schedule:
- if (learningRateSchedule == null)
- throw new IllegalStateException("Layer \"" + layerName
- + "\" learning rate policy schedule (learningRateSchedule) must be set to use "
- + learningRatePolicy);
- break;
- }
-
- if (!Double.isNaN(lrPolicyPower) && (learningRatePolicy != LearningRatePolicy.Inverse
- && learningRatePolicy != LearningRatePolicy.Poly))
- throw new IllegalStateException("Layer \"" + layerName
- + "\" power has been set but will not be applied unless the learning rate policy is set to Inverse or Poly.");
- if (!Double.isNaN(lrPolicySteps) && (learningRatePolicy != LearningRatePolicy.Step
- && learningRatePolicy != LearningRatePolicy.Sigmoid
- && learningRatePolicy != LearningRatePolicy.TorchStep))
- throw new IllegalStateException("Layer \"" + layerName
- + "\" steps have been set but will not be applied unless the learning rate policy is set to Step or Sigmoid.");
- if ((learningRateSchedule != null) && (learningRatePolicy != LearningRatePolicy.Schedule))
- throw new IllegalStateException("Layer \"" + layerName
- + "\" learning rate schedule has been set but will not be applied unless the learning rate policy is set to Schedule.");
-
- }
- ////////////////
-
/**
* Return a configuration based on this builder
*
@@ -1338,12 +1046,7 @@ public NeuralNetConfiguration build() {
conf.optimizationAlgo = optimizationAlgo;
conf.seed = seed;
conf.stepFunction = stepFunction;
- conf.useDropConnect = useDropConnect;
conf.miniBatch = miniBatch;
- conf.learningRatePolicy = learningRatePolicy;
- conf.lrPolicyDecayRate = lrPolicyDecayRate;
- conf.lrPolicySteps = lrPolicySteps;
- conf.lrPolicyPower = lrPolicyPower;
conf.pretrain = pretrain;
conf.cacheMode = this.cacheMode;
@@ -1361,7 +1064,6 @@ private void configureLayer(Layer layer) {
layerName = "Layer not named";
else
layerName = layer.getLayerName();
- learningRateValidation(layerName);
if (layer != null) {
copyConfigToLayer(layerName, layer);
@@ -1383,32 +1085,17 @@ private void configureLayer(Layer layer) {
sl.setConvolutionMode(convolutionMode);
}
}
- LayerValidation.generalValidation(layerName, layer, useDropConnect, dropOut, l2, l2Bias,
- l1, l1Bias, dist, allParamConstraints, weightConstraints, biasConstraints);
+ LayerValidation.generalValidation(layerName, layer, idropOut, l2, l2Bias, l1, l1Bias, dist,
+ allParamConstraints, weightConstraints, biasConstraints);
}
private void copyConfigToLayer(String layerName, Layer layer) {
- if (Double.isNaN(layer.getDropOut()))
- layer.setDropOut(dropOut);
+ if (layer.getIDropout() == null)
+ layer.setIDropout(idropOut);
if (layer instanceof BaseLayer) {
BaseLayer bLayer = (BaseLayer) layer;
- if (Double.isNaN(bLayer.getLearningRate()))
- bLayer.setLearningRate(learningRate);
- if (Double.isNaN(bLayer.getBiasLearningRate())) {
- //Two possibilities when bias LR isn't set for layer:
- // (a) If global bias LR *is* set -> set it to that
- // (b) Otherwise, set to layer LR (and, by extension, the global LR)
- if (!Double.isNaN(biasLearningRate)) {
- //Global bias LR is set
- bLayer.setBiasLearningRate(biasLearningRate);
- } else {
- bLayer.setBiasLearningRate(bLayer.getLearningRate());
- }
- }
- if (bLayer.getLearningRateSchedule() == null)
- bLayer.setLearningRateSchedule(learningRateSchedule);
if (Double.isNaN(bLayer.getL1()))
bLayer.setL1(l1);
if (Double.isNaN(bLayer.getL2()))
@@ -1419,13 +1106,27 @@ private void copyConfigToLayer(String layerName, Layer layer) {
bLayer.setWeightInit(weightInit);
if (Double.isNaN(bLayer.getBiasInit()))
bLayer.setBiasInit(biasInit);
- if (bLayer.getUpdater() == null)
- bLayer.setUpdater(updater);
- if (bLayer.getIUpdater() == null) {
- bLayer.setIUpdater(iUpdater.clone());
+
+ //Configure weight noise:
+ if(weightNoise != null && ((BaseLayer) layer).getWeightNoise() == null){
+ ((BaseLayer) layer).setWeightNoise(weightNoise.clone());
+ }
+
+ //Configure updaters:
+ if(iUpdater != null && bLayer.getIUpdater() == null){
+ bLayer.setIUpdater(iUpdater);
}
- LayerValidation.updaterValidation(layerName, layer, learningRate, momentum, momentumSchedule,
- adamMeanDecay, adamVarDecay, rho, rmsDecay, epsilon);
+ if(biasUpdater != null && bLayer.getBiasUpdater() == null){
+ bLayer.setBiasUpdater(biasUpdater);
+ }
+
+ if(bLayer.getIUpdater() == null && iUpdater == null && bLayer.initializer().numParams(bLayer) > 0){
+ //No updater set anywhere
+ IUpdater u = new Sgd();
+ bLayer.setIUpdater(u);
+ log.warn("*** No updater configuration is set for layer {} - defaulting to {} ***", layerName, u);
+ }
+
if (bLayer.getGradientNormalization() == null)
bLayer.setGradientNormalization(gradientNormalization);
if (Double.isNaN(bLayer.getGradientNormalizationThreshold()))
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java
index bdd43659ae0b..83f74bf2b2f0 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java
@@ -8,8 +8,6 @@
import org.nd4j.linalg.indexing.conditions.Conditions;
import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
import java.util.Set;
/**
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java
index 8853a49e5fd0..db0753e146ed 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java
@@ -10,9 +10,7 @@
import org.nd4j.linalg.indexing.BooleanIndexing;
import org.nd4j.linalg.indexing.conditions.Conditions;
-import java.util.Arrays;
import java.util.Collections;
-import java.util.HashSet;
import java.util.Set;
/**
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java
index 43cde563557c..170b13c6eefd 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java
@@ -6,7 +6,6 @@
import org.nd4j.linalg.factory.Broadcast;
import java.util.Collections;
-import java.util.HashSet;
import java.util.Set;
/**
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distributions.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distributions.java
index 88cdb172f182..f4863c8bb00f 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distributions.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distributions.java
@@ -46,6 +46,18 @@ public static org.nd4j.linalg.api.rng.distribution.Distribution createDistributi
BinomialDistribution bd = (BinomialDistribution) dist;
return Nd4j.getDistributions().createBinomial(bd.getNumberOfTrials(), bd.getProbabilityOfSuccess());
}
+ if (dist instanceof LogNormalDistribution) {
+ LogNormalDistribution lnd = (LogNormalDistribution) dist;
+ return Nd4j.getDistributions().createLogNormal(lnd.getMean(), lnd.getStd());
+ }
+ if (dist instanceof TruncatedNormalDistribution) {
+ TruncatedNormalDistribution tnd = (TruncatedNormalDistribution) dist;
+ return Nd4j.getDistributions().createTruncatedNormal(tnd.getMean(), tnd.getStd());
+ }
+ if (dist instanceof OrthogonalDistribution) {
+ OrthogonalDistribution od = (OrthogonalDistribution) dist;
+ return Nd4j.getDistributions().createOrthogonal(od.getGain());
+ }
throw new RuntimeException("unknown distribution type: " + dist.getClass());
}
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/LogNormalDistribution.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/LogNormalDistribution.java
new file mode 100644
index 000000000000..537ef65dbded
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/LogNormalDistribution.java
@@ -0,0 +1,92 @@
+/*-
+ *
+ * * Copyright 2015 Skymind,Inc.
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+
+package org.deeplearning4j.nn.conf.distribution;
+
+import org.nd4j.shade.jackson.annotation.JsonCreator;
+import org.nd4j.shade.jackson.annotation.JsonProperty;
+
+/**
+ * A log-normal distribution.
+ *
+ */
+public class LogNormalDistribution extends Distribution {
+
+ private double mean, std;
+
+ /**
+ * Create a log-normal distribution
+ * with the given mean and std
+ *
+ * @param mean the mean
+ * @param std the standard deviation
+ */
+ @JsonCreator
+ public LogNormalDistribution(@JsonProperty("mean") double mean, @JsonProperty("std") double std) {
+ this.mean = mean;
+ this.std = std;
+ }
+
+ public double getMean() {
+ return mean;
+ }
+
+ public void setMean(double mean) {
+ this.mean = mean;
+ }
+
+ public double getStd() {
+ return std;
+ }
+
+ public void setStd(double std) {
+ this.std = std;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ long temp;
+ temp = Double.doubleToLongBits(mean);
+ result = prime * result + (int) (temp ^ (temp >>> 32));
+ temp = Double.doubleToLongBits(std);
+ result = prime * result + (int) (temp ^ (temp >>> 32));
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ LogNormalDistribution other = (LogNormalDistribution) obj;
+ if (Double.doubleToLongBits(mean) != Double.doubleToLongBits(other.mean))
+ return false;
+ if (Double.doubleToLongBits(std) != Double.doubleToLongBits(other.std))
+ return false;
+ return true;
+ }
+
+ public String toString() {
+ return "LogNormalDistribution{" + "mean=" + mean + ", std=" + std + '}';
+ }
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java
new file mode 100644
index 000000000000..a9487c000f70
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java
@@ -0,0 +1,78 @@
+/*-
+ *
+ * * Copyright 2015 Skymind,Inc.
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+
+package org.deeplearning4j.nn.conf.distribution;
+
+import org.nd4j.shade.jackson.annotation.JsonCreator;
+import org.nd4j.shade.jackson.annotation.JsonProperty;
+
+/**
+ * Orthogonal distribution.
+ *
+ */
+public class OrthogonalDistribution extends Distribution {
+
+ private double gain;
+
+ /**
+ * Create a log-normal distribution
+ * with the given mean and std
+ *
+ * @param gain the gain
+ */
+ @JsonCreator
+ public OrthogonalDistribution(@JsonProperty("gain") double gain) {
+ this.gain = gain;
+ }
+
+ public double getGain() {
+ return gain;
+ }
+
+ public void setGain(double gain) {
+ this.gain = gain;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ long temp;
+ temp = Double.doubleToLongBits(gain);
+ result = prime * result + (int) (temp ^ (temp >>> 32));
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ OrthogonalDistribution other = (OrthogonalDistribution) obj;
+ if (Double.doubleToLongBits(gain) != Double.doubleToLongBits(other.gain))
+ return false;
+ return true;
+ }
+
+ public String toString() {
+ return "OrthogonalDistribution{gain=" + gain + "}";
+ }
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/TruncatedNormalDistribution.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/TruncatedNormalDistribution.java
new file mode 100644
index 000000000000..0e3ea1a5d918
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/TruncatedNormalDistribution.java
@@ -0,0 +1,92 @@
+/*-
+ *
+ * * Copyright 2015 Skymind,Inc.
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+
+package org.deeplearning4j.nn.conf.distribution;
+
+import org.nd4j.shade.jackson.annotation.JsonCreator;
+import org.nd4j.shade.jackson.annotation.JsonProperty;
+
+/**
+ * A truncated normal distribution.
+ *
+ */
+public class TruncatedNormalDistribution extends Distribution {
+
+ private double mean, std;
+
+ /**
+ * Create a truncated normal distribution
+ * with the given mean and std
+ *
+ * @param mean the mean
+ * @param std the standard deviation
+ */
+ @JsonCreator
+ public TruncatedNormalDistribution(@JsonProperty("mean") double mean, @JsonProperty("std") double std) {
+ this.mean = mean;
+ this.std = std;
+ }
+
+ public double getMean() {
+ return mean;
+ }
+
+ public void setMean(double mean) {
+ this.mean = mean;
+ }
+
+ public double getStd() {
+ return std;
+ }
+
+ public void setStd(double std) {
+ this.std = std;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ long temp;
+ temp = Double.doubleToLongBits(mean);
+ result = prime * result + (int) (temp ^ (temp >>> 32));
+ temp = Double.doubleToLongBits(std);
+ result = prime * result + (int) (temp ^ (temp >>> 32));
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ TruncatedNormalDistribution other = (TruncatedNormalDistribution) obj;
+ if (Double.doubleToLongBits(mean) != Double.doubleToLongBits(other.mean))
+ return false;
+ if (Double.doubleToLongBits(std) != Double.doubleToLongBits(other.std))
+ return false;
+ return true;
+ }
+
+ public String toString() {
+ return "TruncatedNormalDistribution{" + "mean=" + mean + ", std=" + std + '}';
+ }
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/AlphaDropout.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/AlphaDropout.java
new file mode 100644
index 000000000000..08b2b1440933
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/AlphaDropout.java
@@ -0,0 +1,123 @@
+package org.deeplearning4j.nn.conf.dropout;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NonNull;
+import lombok.ToString;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.random.impl.AlphaDropOut;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.schedule.ISchedule;
+import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties;
+import org.nd4j.shade.jackson.annotation.JsonProperty;
+
+/**
+ * AlphaDropout is a dropout technique proposed by Klaumbauer et al. 2017 - Self-Normalizing Neural Networks
+ * https://arxiv.org/abs/1706.02515
+ *
+ * This dropout technique was designed specifically for self-normalizing neural networks - i.e., networks using
+ * {@link org.nd4j.linalg.activations.impl.ActivationSELU} / {@link org.nd4j.linalg.activations.Activation#SELU}
+ * activation function, combined with the N(0,stdev=1/sqrt(fanIn)) "SNN" weight initialization,
+ * {@link org.deeplearning4j.nn.weights.WeightInit#NORMAL}
+ *
+ * In conjuction with the aforementioned activation function and weight initialization, AlphaDropout attempts to keep
+ * both the mean and variance of the post-dropout activations to the the same (in expectation) as before alpha
+ * dropout was applied.
+ * Specifically, AlphaDropout implements a * (x * d + alphaPrime * (1-d)) + b, where d ~ Bernoulli(p), i.e., d \in {0,1}.
+ * Where x is the input activations, a, b, alphaPrime are constants determined from the SELU alpha/lambda parameters.
+ * Users should use the default alpha/lambda values in virtually all cases.
+ *
+ * Dropout schedules (i.e., varying probability p as a function of iteration/epoch) are also supported.
+ *
+ * @author Alex Black
+ */
+@Data
+@EqualsAndHashCode(exclude = {"lastPValue","alphaPrime","a","b"})
+@ToString(exclude = {"lastPValue","alphaPrime","a","b"})
+@JsonIgnoreProperties({"lastPValue", "alphaPrime", "a", "b"})
+public class AlphaDropout implements IDropout {
+
+ public static final double DEFAULT_ALPHA = 1.6732632423543772;
+ public static final double DEFAULT_LAMBDA = 1.0507009873554804;
+
+
+ private final double p;
+ private final ISchedule pSchedule;
+ private final double alpha;
+ private final double lambda;
+
+ private double lastPValue;
+ private double alphaPrime;
+ private double a;
+ private double b;
+
+ /**
+ * @param activationRetainProbability Probability of retaining an activation. See {@link AlphaDropout} javadoc
+ */
+ public AlphaDropout(double activationRetainProbability){
+ this(activationRetainProbability, null, DEFAULT_ALPHA, DEFAULT_LAMBDA);
+ }
+
+ /**
+ * @param activationRetainProbabilitySchedule Schedule for the probability of retaining an activation. See
+ * {@link AlphaDropout} javadoc
+ */
+ public AlphaDropout(@NonNull ISchedule activationRetainProbabilitySchedule){
+ this(Double.NaN, activationRetainProbabilitySchedule, DEFAULT_ALPHA, DEFAULT_LAMBDA);
+ }
+
+ protected AlphaDropout(@JsonProperty("p")double activationRetainProbability,
+ @JsonProperty("pSchedule") ISchedule activationRetainProbabilitySchedule,
+ @JsonProperty("alpha") double alpha, @JsonProperty("lambda") double lambda ){
+ this.p = activationRetainProbability;
+ this.pSchedule = activationRetainProbabilitySchedule;
+ this.alpha = alpha;
+ this.lambda = lambda;
+
+ this.alphaPrime = -lambda * alpha;
+ if(activationRetainProbabilitySchedule == null){
+ this.lastPValue = p;
+ this.a = a(p);
+ this.b = b(p);
+ }
+ }
+
+ @Override
+ public INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace) {
+ //https://arxiv.org/pdf/1706.02515.pdf pg6
+ // "...we propose “alpha dropout”, that randomly sets inputs to α'"
+ // "The affine transformation a(xd + α'(1−d))+b allows to determine parameters a and b such that mean and
+ // variance are kept to their values"
+
+ double pValue;
+ if(pSchedule != null){
+ pValue = pSchedule.valueAt(iteration, epoch);
+ } else {
+ pValue = p;
+ }
+
+ if(pValue != lastPValue){
+ a = a(pValue);
+ b = b(pValue);
+ }
+ lastPValue = pValue;
+
+ INDArray result = inPlace ? inputActivations : inputActivations.dup(inputActivations.ordering());
+ Nd4j.getExecutioner().exec(new AlphaDropOut(result, p, a, alphaPrime, b));
+
+ return result;
+ }
+
+ @Override
+ public AlphaDropout clone() {
+ return new AlphaDropout(p, pSchedule == null ? null : pSchedule.clone(), alpha, lambda);
+ }
+
+ public double a(double p){
+ return 1.0 / Math.sqrt(p + alphaPrime*alphaPrime * p * (1-p));
+ }
+
+ public double b(double p){
+ return -a(p) * (1-p)*alphaPrime;
+ }
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java
new file mode 100644
index 000000000000..e09ebec9ca29
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java
@@ -0,0 +1,86 @@
+package org.deeplearning4j.nn.conf.dropout;
+
+import lombok.Data;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.random.impl.DropOutInverted;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.schedule.ISchedule;
+import org.nd4j.shade.jackson.annotation.JsonProperty;
+
+/**
+ * Implements standard (inverted) dropout.
+ *
+ * Regarding dropout probability. This is the probability of retaining each input activation value for a layer.
+ * Thus, each input activation x is independently set to:
+ * x <- 0, with probability 1-p
+ * x <- x/p with probability p
+ * Note that this "inverted" dropout scheme maintains the expected value of activations - i.e., E(x) is the same before
+ * and after dropout.
+ * Dropout schedules (i.e., varying probability p as a function of iteration/epoch) are also supported.
+ *
+ * Other libraries (notably, Keras) use p == probability(dropping an activation)
+ * In DL4J, {@code new Dropout(x)} will keep an input activation with probability x, and set to 0 with probability 1-x.
+ * Thus, a dropout value of 1.0 is functionally equivalent to no dropout: i.e., 100% probability of retaining
+ * each input activation.
+ *
+ * Note 1: As per all IDropout instances, dropout is applied at training time only - and is automatically not applied at
+ * test time (for evaluation, etc)
+ * Note 2: Care should be taken when setting lower (probability of retaining) values for (too much information may be
+ * lost with aggressive (very low) dropout values).
+ * Note 3: Frequently, dropout is not applied to (or, has higher retain probability for) input (first layer)
+ * layers. Dropout is also often not applied to output layers.
+ * Note 4: Implementation detail (most users can ignore): DL4J uses inverted dropout, as described here:
+ * http://cs231n.github.io/neural-networks-2/
+ *
+ *
+ * See: Srivastava et al. 2014: Dropout: A Simple Way to Prevent Neural Networks from Overfitting
+ * http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf
+ *
+ * @author Alex Black
+ */
+@Data
+public class Dropout implements IDropout {
+
+ private double p;
+ private ISchedule pSchedule;
+
+ /**
+ * @param activationRetainProbability Probability of retaining an activation - see {@link Dropout} javadoc
+ */
+ public Dropout(double activationRetainProbability) {
+ this(activationRetainProbability, null);
+ }
+
+ /**
+ * @param activationRetainProbabilitySchedule Schedule for probability of retaining an activation - see {@link Dropout} javadoc
+ */
+ public Dropout(ISchedule activationRetainProbabilitySchedule){
+ this(Double.NaN, activationRetainProbabilitySchedule);
+ }
+
+ protected Dropout(@JsonProperty("p") double activationRetainProbability, @JsonProperty("pSchedule") ISchedule activationRetainProbabilitySchedule) {
+ this.p = activationRetainProbability;
+ this.pSchedule = activationRetainProbabilitySchedule;
+ }
+
+
+ @Override
+ public INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace) {
+ double currP;
+ if(pSchedule != null){
+ currP = pSchedule.valueAt(iteration, epoch);
+ } else {
+ currP = p;
+ }
+
+ INDArray result = inPlace ? inputActivations : inputActivations.dup(inputActivations.ordering());
+ Nd4j.getExecutioner().exec(new DropOutInverted(result, currP));
+
+ return result;
+ }
+
+ @Override
+ public Dropout clone() {
+ return new Dropout(p, pSchedule == null ? null : pSchedule.clone());
+ }
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianDropout.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianDropout.java
new file mode 100644
index 000000000000..d58fee29a099
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianDropout.java
@@ -0,0 +1,79 @@
+package org.deeplearning4j.nn.conf.dropout;
+
+import lombok.Data;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.impl.transforms.arithmetic.MulOp;
+import org.nd4j.linalg.api.ops.random.impl.GaussianDistribution;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.schedule.ISchedule;
+import org.nd4j.shade.jackson.annotation.JsonProperty;
+
+/**
+ * Gaussian dropout. This is a multiplicative Gaussian noise (mean 1) on the input activations.
+ *
+ * Each input activation x is independently set to:
+ * x <- x * y, where y ~ N(1, stdev = sqrt((1-rate)/rate))
+ * Dropout schedules (i.e., varying probability p as a function of iteration/epoch) are also supported.
+ *
+ * Note 1: As per all IDropout instances, GaussianDropout is applied at training time only - and is automatically not
+ * applied at test time (for evaluation, etc)
+ * Note 2: Frequently, dropout is not applied to (or, has higher retain probability for) input (first layer)
+ * layers. Dropout is also often not applied to output layers.
+ *
+ * See: "Multiplicative Gaussian Noise" in Srivastava et al. 2014: Dropout: A Simple Way to Prevent Neural Networks from
+ * Overfitting http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf
+ *
+ * @author Alex Black
+ */
+@Data
+public class GaussianDropout implements IDropout {
+
+ private final double rate;
+ private final ISchedule rateSchedule;
+
+ /**
+ * @param rate Rate parameter, see {@link GaussianDropout}
+ */
+ public GaussianDropout(double rate){
+ this(rate, null);
+ }
+
+ /**
+ * @param rateSchedule Schedule for rate parameter, see {@link GaussianDropout}
+ */
+ public GaussianDropout(ISchedule rateSchedule){
+ this(Double.NaN, rateSchedule);
+ }
+
+ protected GaussianDropout(@JsonProperty("rate") double rate, @JsonProperty("rateSchedule") ISchedule rateSchedule){
+ this.rate = rate;
+ this.rateSchedule = rateSchedule;
+ }
+
+ @Override
+ public INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace) {
+ double r;
+ if(rateSchedule != null){
+ r = rateSchedule.valueAt(iteration, epoch);
+ } else {
+ r = rate;
+ }
+
+ double stdev = Math.sqrt(r / (1.0 - r));
+
+ INDArray noise = Nd4j.createUninitialized(inputActivations.shape(), inputActivations.ordering());
+ Nd4j.getExecutioner().exec(new GaussianDistribution(noise, 1.0, stdev));
+
+ if(inPlace){
+ return inputActivations.muli(noise);
+ } else {
+ INDArray result = Nd4j.createUninitialized(inputActivations.shape(), inputActivations.ordering());
+ return Nd4j.getExecutioner().execAndReturn(new MulOp(inputActivations, noise, result));
+ }
+ }
+
+ @Override
+ public GaussianDropout clone() {
+ return new GaussianDropout(rate, rateSchedule == null ? null : rateSchedule.clone());
+ }
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianNoise.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianNoise.java
new file mode 100644
index 000000000000..c42efd6030ec
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianNoise.java
@@ -0,0 +1,64 @@
+package org.deeplearning4j.nn.conf.dropout;
+
+import lombok.Data;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.random.impl.GaussianDistribution;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.schedule.ISchedule;
+import org.nd4j.shade.jackson.annotation.JsonProperty;
+
+/**
+ * Applies additive, mean-zero Gaussian noise to the input - i.e., x = x + N(0,stddev).
+ * Note that this differs from {@link GaussianDropout}, which applies multiplicative mean-1 N(1,s) noise.
+ * Note also that schedules for the standard deviation value can also be used.
+ *
+ * @author Alex Black
+ */
+@Data
+public class GaussianNoise implements IDropout {
+
+ private double stddev;
+ private ISchedule stddevSchedule;
+
+ /**
+ * @param stddev Standard deviation for the mean 0 Gaussian noise
+ */
+ public GaussianNoise(double stddev){
+ this(stddev, null);
+ }
+
+ /**
+ * @param stddevSchedule Schedule for standard deviation for the mean 0 Gaussian noise
+ */
+ public GaussianNoise(ISchedule stddevSchedule){
+ this(Double.NaN, stddevSchedule);
+ }
+
+ protected GaussianNoise(@JsonProperty("stddev") double stddev, @JsonProperty("stddevSchedule") ISchedule stddevSchedule){
+ this.stddev = stddev;
+ this.stddevSchedule = stddevSchedule;
+ }
+
+ @Override
+ public INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace) {
+ double currS;
+ if(stddevSchedule != null){
+ currS = stddevSchedule.valueAt(iteration, epoch);
+ } else {
+ currS = stddev;
+ }
+
+ INDArray result = inPlace ? inputActivations : inputActivations.dup(inputActivations.ordering());
+ INDArray noise = Nd4j.createUninitialized(inputActivations.shape(), inputActivations.ordering());
+ Nd4j.getExecutioner().exec(new GaussianDistribution(noise, 0, currS));
+
+ result.addi(noise);
+
+ return result;
+ }
+
+ @Override
+ public IDropout clone() {
+ return new GaussianNoise(stddev, stddevSchedule);
+ }
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/IDropout.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/IDropout.java
new file mode 100644
index 000000000000..a344d79cae60
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/IDropout.java
@@ -0,0 +1,29 @@
+package org.deeplearning4j.nn.conf.dropout;
+
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.shade.jackson.annotation.JsonTypeInfo;
+
+import java.io.Serializable;
+
+/**
+ * IDropout instances operate on an activations array, modifying or dropping values at training time only.
+ * IDropout instances are not applied at test time.
+ *
+ * @author Alex Black
+ */
+@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class")
+public interface IDropout extends Serializable, Cloneable {
+
+ /**
+ *
+ * @param inputActivations Input activations array
+ * @param iteration Current iteration number
+ * @param epoch Current epoch number
+ * @param inPlace If true: modify the input activations in-place. False: Copy the input activations and
+ * apply dropout on the copy instead
+ * @return
+ */
+ INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace);
+
+ IDropout clone();
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java
index 003f89ea18fb..d433bb8698a8 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java
@@ -69,24 +69,6 @@ public double getL2ByParam(String paramName) {
}
}
- @Override
- public double getLearningRateByParam(String paramName) {
- switch (paramName) {
- case LSTMParamInitializer.INPUT_WEIGHT_KEY:
- case LSTMParamInitializer.RECURRENT_WEIGHT_KEY:
- return learningRate;
- case LSTMParamInitializer.BIAS_KEY:
- if (!Double.isNaN(biasLearningRate)) {
- //Bias learning rate has been explicitly set
- return biasLearningRate;
- } else {
- return learningRate;
- }
- default:
- throw new IllegalArgumentException("Unknown parameter name: \"" + paramName + "\"");
- }
- }
-
@AllArgsConstructor
@NoArgsConstructor
public static abstract class Builder> extends BaseRecurrentLayer.Builder {
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java
index c310d66b98c7..1e560d901455 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java
@@ -100,12 +100,6 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
.build();
}
- @Override
- public double getLearningRateByParam(String paramName) {
- //Not applicable
- return 0;
- }
-
@Override
public void setNIn(InputType inputType, boolean override) {
//No op
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java
index 043fa77040b7..ffd4f5afed80 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java
@@ -83,7 +83,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
int updaterStateSize = (int) getIUpdater().stateSize(numParams);
int trainSizePerEx = 0;
- if (getDropOut() > 0) {
+ if (getIDropout() != null) {
if (false) {
//TODO drop connect
//Dup the weights... note that this does NOT depend on the minibatch size...
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java
index 018eea28dcb7..54ed891bb0eb 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java
@@ -25,13 +25,13 @@
import org.deeplearning4j.nn.conf.LearningRatePolicy;
import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.Distribution;
+import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise;
import org.deeplearning4j.nn.weights.WeightInit;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.learning.config.IUpdater;
import java.io.Serializable;
-import java.util.HashMap;
import java.util.Map;
/**
@@ -45,34 +45,13 @@ public abstract class BaseLayer extends Layer implements Serializable, Cloneable
protected WeightInit weightInit;
protected double biasInit;
protected Distribution dist;
- protected double learningRate;
- protected double biasLearningRate;
- //learning rate after n iterations
- protected Map learningRateSchedule;
- @Deprecated
- protected double momentum;
- //momentum after n iterations
- @Deprecated
- protected Map momentumSchedule;
protected double l1;
protected double l2;
protected double l1Bias;
protected double l2Bias;
- @Deprecated
- protected Updater updater;
protected IUpdater iUpdater;
- //adadelta - weight for how much to consider previous history
- @Deprecated
- protected double rho;
- //Epsilon value for adagrad and adadelta
- @Deprecated
- protected double epsilon;
- @Deprecated
- protected double rmsDecay;
- @Deprecated
- protected double adamMeanDecay;
- @Deprecated
- protected double adamVarDecay;
+ protected IUpdater biasUpdater;
+ protected IWeightNoise weightNoise;
protected GradientNormalization gradientNormalization = GradientNormalization.None; //Clipping, rescale based on l2 norm, etc
protected double gradientNormalizationThreshold = 1.0; //Threshold for l2 and element-wise gradient clipping
@@ -84,24 +63,15 @@ public BaseLayer(Builder builder) {
this.weightInit = builder.weightInit;
this.biasInit = builder.biasInit;
this.dist = builder.dist;
- this.learningRate = builder.learningRate;
- this.biasLearningRate = builder.biasLearningRate;
- this.learningRateSchedule = builder.learningRateSchedule;
- this.momentum = builder.momentum;
- this.momentumSchedule = builder.momentumAfter;
this.l1 = builder.l1;
this.l2 = builder.l2;
this.l1Bias = builder.l1Bias;
this.l2Bias = builder.l2Bias;
- this.updater = builder.updater;
this.iUpdater = builder.iupdater;
- this.rho = builder.rho;
- this.epsilon = builder.epsilon;
- this.rmsDecay = builder.rmsDecay;
- this.adamMeanDecay = builder.adamMeanDecay;
- this.adamVarDecay = builder.adamVarDecay;
+ this.biasUpdater = builder.biasUpdater;
this.gradientNormalization = builder.gradientNormalization;
this.gradientNormalizationThreshold = builder.gradientNormalizationThreshold;
+ this.weightNoise = builder.weightNoise;
}
/**
@@ -111,25 +81,16 @@ public BaseLayer(Builder builder) {
*/
public void resetLayerDefaultConfig() {
//clear the learning related params for all layers in the origConf and set to defaults
- this.setUpdater(null);
this.setIUpdater(null);
- this.setMomentum(Double.NaN);
this.setWeightInit(null);
this.setBiasInit(Double.NaN);
this.setDist(null);
- this.setLearningRate(Double.NaN);
- this.setBiasLearningRate(Double.NaN);
- this.setLearningRateSchedule(null);
- this.setMomentumSchedule(null);
this.setL1(Double.NaN);
this.setL2(Double.NaN);
- this.setRho(Double.NaN);
- this.setEpsilon(Double.NaN);
- this.setRmsDecay(Double.NaN);
- this.setAdamMeanDecay(Double.NaN);
- this.setAdamVarDecay(Double.NaN);
this.setGradientNormalization(GradientNormalization.None);
this.setGradientNormalizationThreshold(1.0);
+ this.iUpdater = null;
+ this.biasUpdater = null;
}
@Override
@@ -137,27 +98,9 @@ public BaseLayer clone() {
BaseLayer clone = (BaseLayer) super.clone();
if (clone.dist != null)
clone.dist = clone.dist.clone();
- if (clone.learningRateSchedule != null)
- clone.learningRateSchedule = new HashMap<>(clone.learningRateSchedule);
- if (clone.momentumSchedule != null)
- clone.momentumSchedule = new HashMap<>(clone.momentumSchedule);
return clone;
}
- /**
- * Get the updater for the given parameter. Typically the same updater will be used for all updaters, but this
- * is not necessarily the case
- *
- * @param paramName Parameter name
- * @return Updater for the parameter
- * @deprecated Use {@link #getIUpdaterByParam(String)}
- */
- @Deprecated
- @Override
- public Updater getUpdaterByParam(String paramName) {
- return updater;
- }
-
/**
* Get the updater for the given parameter. Typically the same updater will be used for all updaters, but this
* is not necessarily the case
@@ -166,7 +109,10 @@ public Updater getUpdaterByParam(String paramName) {
* @return IUpdater for the parameter
*/
@Override
- public IUpdater getIUpdaterByParam(String paramName) {
+ public IUpdater getUpdaterByParam(String paramName) {
+ if(biasUpdater != null && initializer().isBiasParam(paramName)){
+ return biasUpdater;
+ }
return iUpdater;
}
@@ -176,46 +122,15 @@ public abstract static class Builder> extends Layer.Builder
protected WeightInit weightInit = null;
protected double biasInit = Double.NaN;
protected Distribution dist = null;
- protected double learningRate = Double.NaN;
- protected double biasLearningRate = Double.NaN;
- protected Map learningRateSchedule = null;
- @Deprecated
- protected double momentum = Double.NaN;
- @Deprecated
- protected Map momentumAfter = null;
protected double l1 = Double.NaN;
protected double l2 = Double.NaN;
protected double l1Bias = Double.NaN;
protected double l2Bias = Double.NaN;
- @Deprecated
- protected Updater updater = null;
protected IUpdater iupdater = null;
- @Deprecated
- protected double rho = Double.NaN;
- @Deprecated
- protected double epsilon = Double.NaN;
- @Deprecated
- protected double rmsDecay = Double.NaN;
- @Deprecated
- protected double adamMeanDecay = Double.NaN;
- @Deprecated
- protected double adamVarDecay = Double.NaN;
+ protected IUpdater biasUpdater = null;
protected GradientNormalization gradientNormalization = null;
protected double gradientNormalizationThreshold = Double.NaN;
- protected LearningRatePolicy learningRatePolicy = null;
-
-
- /**
- * Layer activation function.
- * Typical values include:
- * "relu" (rectified linear), "tanh", "sigmoid", "softmax",
- * "hardtanh", "leakyrelu", "maxout", "softsign", "softplus"
- * @deprecated Use {@link #activation(Activation)} or {@link @activation(IActivation)}
- */
- @Deprecated
- public T activation(String activationFunction) {
- return activation(Activation.fromString(activationFunction));
- }
+ protected IWeightNoise weightNoise;
/**
* Set the activation function for the layer. This overload can be used for custom {@link IActivation} instances
@@ -265,30 +180,6 @@ public T dist(Distribution dist) {
return (T) this;
}
- /**
- * Learning rate. Defaults to 1e-1
- */
- public T learningRate(double learningRate) {
- this.learningRate = learningRate;
- return (T) this;
- }
-
- /**
- * Bias learning rate. Set this to apply a different learning rate to the bias
- */
- public T biasLearningRate(double biasLearningRate) {
- this.biasLearningRate = biasLearningRate;
- return (T) this;
- }
-
- /**
- * Learning rate schedule. Map of the iteration to the learning rate to apply at that iteration.
- */
- public T learningRateSchedule(Map learningRateSchedule) {
- this.learningRateSchedule = learningRateSchedule;
- return (T) this;
- }
-
/**
* L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 regularization
* coefficient for the bias.
@@ -323,32 +214,13 @@ public T l2Bias(double l2Bias) {
return (T) this;
}
- /**
- * Momentum rate.
- * @deprecated Use {@code .updater(new Nesterov(momentum))} instead
- */
- @Deprecated
- public T momentum(double momentum) {
- this.momentum = momentum;
- return (T) this;
- }
-
- /**
- * Momentum schedule. Map of the iteration to the momentum rate to apply at that iteration.
- * @deprecated Use {@code .updater(Nesterov.builder().momentumSchedule(schedule).build())} instead
- */
- @Deprecated
- public T momentumAfter(Map momentumAfter) {
- this.momentumAfter = momentumAfter;
- return (T) this;
- }
-
/**
* Gradient updater. For example, SGD for standard stochastic gradient descent, NESTEROV for Nesterov momentum,
* RSMPROP for RMSProp, etc.
*
* @see Updater
*/
+ @Deprecated
public T updater(Updater updater) {
return updater(updater.getIUpdaterWithDefaultConfig());
}
@@ -365,56 +237,13 @@ public T updater(IUpdater updater) {
}
/**
- * Ada delta coefficient, rho. Only applies if using .updater(Updater.ADADELTA)
- *
- * @param rho
- * @deprecated use {@code .updater(new AdaDelta(rho,epsilon))} intead
- */
- @Deprecated
- public T rho(double rho) {
- this.rho = rho;
- return (T) this;
- }
-
- /**
- * Decay rate for RMSProp. Only applies if using .updater(Updater.RMSPROP)
- * @deprecated use {@code .updater(new RmsProp(rmsDecay))} instead
- */
- @Deprecated
- public T rmsDecay(double rmsDecay) {
- this.rmsDecay = rmsDecay;
- return (T) this;
- }
-
- /**
- * Epsilon value for updaters: Adam, RMSProp, Adagrad, Adadelta
+ * Gradient updater configuration, for the biases only. If not set, biases will use the updater as
+ * set by {@link #updater(IUpdater)}
*
- * @param epsilon Epsilon value to use
- * @deprecated Use use {@code .updater(Adam.builder().epsilon(epsilon).build())} or similar instead
- */
- @Deprecated
- public T epsilon(double epsilon) {
- this.epsilon = epsilon;
- return (T) this;
- }
-
- /**
- * Mean decay rate for Adam updater. Only applies if using .updater(Updater.ADAM)
- * @deprecated use {@code .updater(Adam.builder().beta1(adamMeanDecay).build())} intead
- */
- @Deprecated
- public T adamMeanDecay(double adamMeanDecay) {
- this.adamMeanDecay = adamMeanDecay;
- return (T) this;
- }
-
- /**
- * Variance decay rate for Adam updater. Only applies if using .updater(Updater.ADAM)
- * @deprecated use {@code .updater(Adam.builder().beta2(adamVarDecay).build())} intead
+ * @param biasUpdater Updater to use for bias parameters
*/
- @Deprecated
- public T adamVarDecay(double adamVarDecay) {
- this.adamVarDecay = adamVarDecay;
+ public T biasUpdater(IUpdater biasUpdater){
+ this.biasUpdater = biasUpdater;
return (T) this;
}
@@ -441,14 +270,14 @@ public T gradientNormalizationThreshold(double threshold) {
}
/**
- * Learning rate decay policy. Used to adapt learning rate based on policy.
+ * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and
+ * {@link org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) for this layer
*
- * @param policy Type of policy to use. Defaults to None.
- * @see GradientNormalization
+ * @param weightNoise Weight noise instance to use
*/
- public T learningRateDecayPolicy(LearningRatePolicy policy) {
- this.learningRatePolicy = policy;
- return (T) this;
+ public T weightNoise(IWeightNoise weightNoise){
+ this.weightNoise = weightNoise;
+ return (T)this;
}
}
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java
index d9b40feb2925..fa35859c3e1d 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java
@@ -1,12 +1,12 @@
package org.deeplearning4j.nn.conf.layers;
-import lombok.*;
-import org.deeplearning4j.nn.api.ParamInitializer;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
+import lombok.ToString;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.nn.conf.memory.MemoryReport;
-import org.deeplearning4j.nn.params.DefaultParamInitializer;
-import org.deeplearning4j.nn.params.EmptyParamInitializer;
import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT;
@@ -63,7 +63,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
int trainSizeFixed = 0;
int trainSizeVariable = 0;
- if (getDropOut() > 0) {
+ if (getIDropout() != null) {
if (false) {
//TODO drop connect
//Dup the weights... note that this does NOT depend on the minibatch size...
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java
index cd9d15c43fc8..87850ab4519c 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java
@@ -71,30 +71,6 @@ public double getL2ByParam(String paramName) {
}
}
- @Override
- public double getLearningRateByParam(String paramName) {
- switch (paramName) {
- case PretrainParamInitializer.WEIGHT_KEY:
- return learningRate;
- case PretrainParamInitializer.BIAS_KEY:
- if (!Double.isNaN(biasLearningRate)) {
- //Bias learning rate has been explicitly set
- return biasLearningRate;
- } else {
- return learningRate;
- }
- case PretrainParamInitializer.VISIBLE_BIAS_KEY:
- if (!Double.isNaN(biasLearningRate)) {
- //Bias learning rate has been explicitly set
- return biasLearningRate;
- } else {
- return learningRate;
- }
- default:
- throw new IllegalArgumentException("Unknown parameter name: \"" + paramName + "\"");
- }
- }
-
@Override
public boolean isPretrainParam(String paramName) {
return PretrainParamInitializer.VISIBLE_BIAS_KEY.equals(paramName);
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java
index d8feb7125c83..232867441e1a 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java
@@ -1,6 +1,9 @@
package org.deeplearning4j.nn.conf.layers;
-import lombok.*;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
+import lombok.ToString;
import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java
new file mode 100644
index 000000000000..1e9c09d0523d
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java
@@ -0,0 +1,110 @@
+/*-
+ *
+ * * Copyright 2017 Skymind,Inc.
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+package org.deeplearning4j.nn.conf.layers;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
+import lombok.ToString;
+import org.deeplearning4j.nn.api.ParamInitializer;
+import org.deeplearning4j.nn.conf.InputPreProcessor;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
+import org.deeplearning4j.nn.conf.memory.MemoryReport;
+import org.deeplearning4j.nn.params.EmptyParamInitializer;
+import org.deeplearning4j.optimize.api.IterationListener;
+import org.nd4j.linalg.api.ndarray.INDArray;
+
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * Upsampling base layer
+ *
+ * @author Max Pumperla
+ */
+
+@Data
+@NoArgsConstructor
+@ToString(callSuper = true)
+@EqualsAndHashCode(callSuper = true)
+public abstract class BaseUpsamplingLayer extends Layer {
+
+ protected int size;
+
+ protected BaseUpsamplingLayer(UpsamplingBuilder builder) {
+ super(builder);
+ this.size = builder.size;
+ }
+
+ @Override
+ public BaseUpsamplingLayer clone() {
+ BaseUpsamplingLayer clone = (BaseUpsamplingLayer) super.clone();
+ return clone;
+ }
+
+ @Override
+ public ParamInitializer initializer() {
+ return EmptyParamInitializer.getInstance();
+ }
+
+
+ @Override
+ public void setNIn(InputType inputType, boolean override) {
+ //No op: upsampling layer doesn't have nIn value
+ }
+
+ @Override
+ public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
+ if (inputType == null) {
+ throw new IllegalStateException("Invalid input for Upsampling layer (layer name=\"" + getLayerName()
+ + "\"): input is null");
+ }
+ return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName());
+ }
+
+ @Override
+ public double getL1ByParam(String paramName) {
+ //Not applicable
+ return 0;
+ }
+
+ @Override
+ public double getL2ByParam(String paramName) {
+ //Not applicable
+ return 0;
+ }
+
+ @Override
+ public boolean isPretrainParam(String paramName) {
+ throw new UnsupportedOperationException("UpsamplingLayer does not contain parameters");
+ }
+
+
+ @NoArgsConstructor
+ protected static abstract class UpsamplingBuilder>
+ extends Layer.Builder {
+ protected int size = 1;
+
+ protected UpsamplingBuilder(int size) {
+ this.size = size;
+ }
+ }
+
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java
index e75a636bdfd9..7f80569761b4 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java
@@ -6,7 +6,6 @@
import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.nn.conf.memory.MemoryReport;
@@ -17,7 +16,10 @@
import org.nd4j.linalg.learning.config.IUpdater;
import org.nd4j.linalg.learning.config.NoOp;
-import java.util.*;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
/**
* Batch normalization configuration
@@ -139,35 +141,7 @@ public double getL2ByParam(String paramName) {
}
@Override
- public double getLearningRateByParam(String paramName) {
- switch (paramName) {
- case BatchNormalizationParamInitializer.BETA:
- case BatchNormalizationParamInitializer.GAMMA:
- return learningRate;
- case BatchNormalizationParamInitializer.GLOBAL_MEAN:
- case BatchNormalizationParamInitializer.GLOBAL_VAR:
- return 0.0;
- default:
- throw new IllegalArgumentException("Unknown parameter: \"" + paramName + "\"");
- }
- }
-
- @Override
- public Updater getUpdaterByParam(String paramName) {
- switch (paramName) {
- case BatchNormalizationParamInitializer.BETA:
- case BatchNormalizationParamInitializer.GAMMA:
- return updater;
- case BatchNormalizationParamInitializer.GLOBAL_MEAN:
- case BatchNormalizationParamInitializer.GLOBAL_VAR:
- return Updater.NONE;
- default:
- throw new IllegalArgumentException("Unknown parameter: \"" + paramName + "\"");
- }
- }
-
- @Override
- public IUpdater getIUpdaterByParam(String paramName) {
+ public IUpdater getUpdaterByParam(String paramName) {
switch (paramName) {
case BatchNormalizationParamInitializer.BETA:
case BatchNormalizationParamInitializer.GAMMA:
@@ -190,7 +164,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
int updaterStateSize = 0;
for (String s : BatchNormalizationParamInitializer.keys()) {
- updaterStateSize += getIUpdaterByParam(s).stateSize(nOut);
+ updaterStateSize += getUpdaterByParam(s).stateSize(nOut);
}
//During forward pass: working memory size approx. equal to 2x input size (copy ops, etc)
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java
index 8b8c2263f8f8..e990dcabb69f 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java
@@ -25,13 +25,11 @@
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.nn.conf.memory.MemoryReport;
import org.deeplearning4j.nn.params.CenterLossParamInitializer;
import org.deeplearning4j.optimize.api.IterationListener;
-import org.deeplearning4j.util.LayerValidation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.learning.config.IUpdater;
import org.nd4j.linalg.learning.config.NoOp;
@@ -91,19 +89,7 @@ public ParamInitializer initializer() {
}
@Override
- @Deprecated
- public Updater getUpdaterByParam(String paramName) {
- // center loss utilizes alpha directly for this so any updater can be used for other layers
- switch (paramName) {
- case CenterLossParamInitializer.CENTER_KEY:
- return Updater.NONE;
- default:
- return updater;
- }
- }
-
- @Override
- public IUpdater getIUpdaterByParam(String paramName) {
+ public IUpdater getUpdaterByParam(String paramName) {
// center loss utilizes alpha directly for this so any updater can be used for other layers
switch (paramName) {
case CenterLossParamInitializer.CENTER_KEY:
@@ -113,25 +99,6 @@ public IUpdater getIUpdaterByParam(String paramName) {
}
}
- @Override
- public double getLearningRateByParam(String paramName) {
- switch (paramName) {
- case CenterLossParamInitializer.WEIGHT_KEY:
- return learningRate;
- case CenterLossParamInitializer.BIAS_KEY:
- if (!Double.isNaN(biasLearningRate)) {
- //Bias learning rate has been explicitly set
- return biasLearningRate;
- } else {
- return learningRate;
- }
- case CenterLossParamInitializer.CENTER_KEY:
- return 0;
- default:
- throw new IllegalStateException("Unknown parameter: \"" + paramName + "\"");
- }
- }
-
@Override
public double getL1ByParam(String paramName) {
switch (paramName) {
@@ -182,13 +149,13 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
int nParamsCenter = nIn * nOut;
int numParams = nParamsW + nParamsB + nParamsCenter;
- int updaterStateSize = (int) (getIUpdaterByParam(CenterLossParamInitializer.WEIGHT_KEY).stateSize(nParamsW)
- + getIUpdaterByParam(CenterLossParamInitializer.BIAS_KEY).stateSize(nParamsB)
- + getIUpdaterByParam(CenterLossParamInitializer.CENTER_KEY).stateSize(nParamsCenter));
+ int updaterStateSize = (int) (getUpdaterByParam(CenterLossParamInitializer.WEIGHT_KEY).stateSize(nParamsW)
+ + getUpdaterByParam(CenterLossParamInitializer.BIAS_KEY).stateSize(nParamsB)
+ + getUpdaterByParam(CenterLossParamInitializer.CENTER_KEY).stateSize(nParamsCenter));
int trainSizeFixed = 0;
int trainSizeVariable = 0;
- if (getDropOut() > 0) {
+ if (getIDropout() != null) {
if (false) {
//TODO drop connect
//Dup the weights... note that this does NOT depend on the minibatch size...
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java
index d53647b0e086..f1ba82a4ba17 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java
@@ -21,25 +21,6 @@
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.ToString;
-import org.deeplearning4j.nn.api.Layer;
-import org.deeplearning4j.nn.api.ParamInitializer;
-import org.deeplearning4j.nn.conf.*;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
-import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
-import org.deeplearning4j.nn.conf.memory.MemoryReport;
-import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
-import org.deeplearning4j.optimize.api.IterationListener;
-import org.deeplearning4j.util.ConvolutionUtils;
-import org.deeplearning4j.util.LayerValidation;
-import org.nd4j.linalg.activations.Activation;
-import org.nd4j.linalg.activations.IActivation;
-import org.nd4j.linalg.api.ndarray.INDArray;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
/**
* 1D convolution layer
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java
index bb0b004baafe..66691762b646 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java
@@ -4,11 +4,9 @@
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.ToString;
-import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
import org.deeplearning4j.optimize.api.IterationListener;
import org.deeplearning4j.util.ConvolutionUtils;
import org.nd4j.linalg.api.ndarray.INDArray;
@@ -45,7 +43,7 @@ private Convolution1DLayer(Builder builder) {
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
Collection iterationListeners, int layerIndex, INDArray layerParamsView,
boolean initializeParams) {
- org.deeplearning4j.util.LayerValidation.assertNInNOutSet("Convolution1DLayer", getLayerName(), layerIndex,
+ LayerValidation.assertNInNOutSet("Convolution1DLayer", getLayerName(), layerIndex,
getNIn(), getNOut());
org.deeplearning4j.nn.layers.convolution.Convolution1DLayer ret =
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java
index ec89f752399f..7b1d8ac8d40b 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java
@@ -21,25 +21,6 @@
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.ToString;
-import org.deeplearning4j.nn.api.Layer;
-import org.deeplearning4j.nn.api.ParamInitializer;
-import org.deeplearning4j.nn.conf.*;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
-import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
-import org.deeplearning4j.nn.conf.memory.MemoryReport;
-import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
-import org.deeplearning4j.optimize.api.IterationListener;
-import org.deeplearning4j.util.ConvolutionUtils;
-import org.deeplearning4j.util.LayerValidation;
-import org.nd4j.linalg.activations.Activation;
-import org.nd4j.linalg.activations.IActivation;
-import org.nd4j.linalg.api.ndarray.INDArray;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
/**
* 2D convolution layer
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
index 919be8fb7e03..8a6a50b57c4f 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
@@ -1,6 +1,9 @@
package org.deeplearning4j.nn.conf.layers;
-import lombok.*;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
+import lombok.ToString;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.*;
@@ -12,7 +15,6 @@
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.api.IterationListener;
import org.deeplearning4j.util.ConvolutionUtils;
-import org.deeplearning4j.util.LayerValidation;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.api.ndarray.INDArray;
@@ -202,23 +204,6 @@ public double getL2ByParam(String paramName) {
}
}
- @Override
- public double getLearningRateByParam(String paramName) {
- switch (paramName) {
- case ConvolutionParamInitializer.WEIGHT_KEY:
- return learningRate;
- case ConvolutionParamInitializer.BIAS_KEY:
- if (!Double.isNaN(biasLearningRate)) {
- //Bias learning rate has been explicitly set
- return biasLearningRate;
- } else {
- return learningRate;
- }
- default:
- throw new IllegalArgumentException("Unknown parameter name: \"" + paramName + "\"");
- }
- }
-
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
int paramSize = initializer().numParams(this);
@@ -252,7 +237,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
trainWorkingSizePerEx = im2colSizePerEx;
}
- if (getDropOut() > 0) {
+ if (getIDropout() != null) {
//Dup on the input before dropout, but only for training
trainWorkingSizePerEx += inputType.arrayElementsPerExample();
}
@@ -377,36 +362,6 @@ public Builder dist(Distribution dist) {
return this;
}
- /**
- * Learning rate. Defaults to 1e-1
- *
- * @param learningRate
- */
- @Override
- public Builder learningRate(double learningRate) {
- return super.learningRate(learningRate);
- }
-
- /**
- * Bias learning rate. Set this to apply a different learning rate to the bias
- *
- * @param biasLearningRate
- */
- @Override
- public Builder biasLearningRate(double biasLearningRate) {
- return super.biasLearningRate(biasLearningRate);
- }
-
- /**
- * Learning rate schedule. Map of the iteration to the learning rate to apply at that iteration.
- *
- * @param learningRateSchedule
- */
- @Override
- public Builder learningRateSchedule(Map learningRateSchedule) {
- return super.learningRateSchedule(learningRateSchedule);
- }
-
/**
* L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 regularization
* coefficient for the bias.
@@ -449,26 +404,6 @@ public Builder l2Bias(double l2Bias) {
return super.l2Bias(l2Bias);
}
- /**
- * Momentum rate.
- *
- * @param momentum
- */
- @Override
- public Builder momentum(double momentum) {
- return super.momentum(momentum);
- }
-
- /**
- * Momentum schedule. Map of the iteration to the momentum rate to apply at that iteration.
- *
- * @param momentumAfter
- */
- @Override
- public Builder momentumAfter(Map momentumAfter) {
- return super.momentumAfter(momentumAfter);
- }
-
/**
* Gradient updater. For example, SGD for standard stochastic gradient descent, NESTEROV for Nesterov momentum,
* RSMPROP for RMSProp, etc.
@@ -477,61 +412,11 @@ public Builder momentumAfter(Map momentumAfter) {
* @see Updater
*/
@Override
+ @Deprecated
public Builder updater(Updater updater) {
return super.updater(updater);
}
- /**
- * Ada delta coefficient, rho. Only applies if using .updater(Updater.ADADELTA)
- *
- * @param rho
- */
- @Override
- public Builder rho(double rho) {
- return super.rho(rho);
- }
-
- /**
- * Decay rate for RMSProp. Only applies if using .updater(Updater.RMSPROP)
- *
- * @param rmsDecay
- */
- @Override
- public Builder rmsDecay(double rmsDecay) {
- return super.rmsDecay(rmsDecay);
- }
-
- /**
- * Epsilon value for updaters: Adagrad and Adadelta. Only used if using Updater.ADAGRAD or Updater.ADADELTA
- *
- * @param epsilon Epsilon value to use for adagrad and adadelta
- */
- @Override
- public Builder epsilon(double epsilon) {
- return super.epsilon(epsilon);
- }
-
- /**
- * Mean decay rate for Adam updater. Only applies if using .updater(Updater.ADAM)
- *
- * @param adamMeanDecay
- */
- @Override
- public Builder adamMeanDecay(double adamMeanDecay) {
- return super.adamMeanDecay(adamMeanDecay);
- }
-
- /**
- * Variance decay rate for Adam updater. Only applies if using .updater(Updater.ADAM)
- *
- * @param adamVarDecay
- */
- @Override
- public Builder adamVarDecay(double adamVarDecay) {
- super.adamVarDecay(adamVarDecay);
- return this;
- }
-
/**
* Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping etc.
*
@@ -558,18 +443,6 @@ public Builder gradientNormalizationThreshold(double threshold) {
return this;
}
- /**
- * Learning rate decay policy. Used to adapt learning rate based on policy.
- *
- * @param policy Type of policy to use. Defaults to None.
- * @see GradientNormalization
- */
- @Override
- public Builder learningRateDecayPolicy(LearningRatePolicy policy) {
- super.learningRateDecayPolicy(policy);
- return this;
- }
-
/**
* Size of the convolution
* rows/columns
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java
index 45237b36e191..81ecabc78f7a 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java
@@ -18,7 +18,10 @@
package org.deeplearning4j.nn.conf.layers;
-import lombok.*;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
+import lombok.ToString;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@@ -27,7 +30,6 @@
import org.deeplearning4j.nn.conf.memory.MemoryReport;
import org.deeplearning4j.nn.params.DefaultParamInitializer;
import org.deeplearning4j.optimize.api.IterationListener;
-import org.deeplearning4j.util.LayerValidation;
import org.nd4j.linalg.api.ndarray.INDArray;
import java.util.Collection;
@@ -79,7 +81,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
int trainSizeFixed = 0;
int trainSizeVariable = 0;
- if (getDropOut() > 0) {
+ if (getIDropout() != null) {
if (false) {
//TODO drop connect
//Dup the weights... note that this does NOT depend on the minibatch size...
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java
index d94479b0576a..1b4d61fabe3a 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java
@@ -7,6 +7,7 @@
import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.nn.conf.memory.MemoryReport;
@@ -80,12 +81,6 @@ public double getL2ByParam(String paramName) {
return 0;
}
- @Override
- public double getLearningRateByParam(String paramName) {
- //Not applicable
- return 0;
- }
-
@Override
public boolean isPretrainParam(String paramName) {
throw new UnsupportedOperationException("Dropout layer does not contain parameters");
@@ -107,8 +102,9 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
@NoArgsConstructor
public static class Builder extends FeedForwardLayer.Builder {
- public Builder(double dropOut) {
- this.dropOut = dropOut;
+
+ public Builder(double dropout){
+ this.dropOut(new Dropout(dropout));
}
@Override
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java
index 76896a7dcce1..8b5bbf41d5e2 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java
@@ -1,11 +1,12 @@
package org.deeplearning4j.nn.conf.layers;
-import lombok.*;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
+import lombok.ToString;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.ParamInitializer;
-import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.constraint.BaseConstraint;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.nn.conf.memory.MemoryReport;
@@ -14,9 +15,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import java.util.Collection;
-import java.util.List;
import java.util.Map;
-import java.util.Set;
/**
* Embedding layer: feed-forward layer that expects single integers per example as input (class numbers, in range 0 to numClass-1)
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java
index 657a851bed0d..142257d86178 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java
@@ -106,23 +106,6 @@ public double getL2ByParam(String paramName) {
}
}
- @Override
- public double getLearningRateByParam(String paramName) {
- switch (paramName) {
- case DefaultParamInitializer.WEIGHT_KEY:
- return learningRate;
- case DefaultParamInitializer.BIAS_KEY:
- if (!Double.isNaN(biasLearningRate)) {
- //Bias learning rate has been explicitly set
- return biasLearningRate;
- } else {
- return learningRate;
- }
- default:
- throw new IllegalStateException("Unknown parameter: \"" + paramName + "\"");
- }
- }
-
@Override
public boolean isPretrainParam(String paramName) {
return false; //No pretrain params in standard FF layers
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java
index 4927df3f30c9..549d5b413ade 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java
@@ -150,12 +150,6 @@ public double getL2ByParam(String paramName) {
return 0;
}
- @Override
- public double getLearningRateByParam(String paramName) {
- //Not applicable
- return 0;
- }
-
@Override
public boolean isPretrainParam(String paramName) {
throw new UnsupportedOperationException("Global pooling layer does not contain parameters");
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java
index 91e9b33c5dbe..d5baa079b436 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java
@@ -125,27 +125,6 @@ public double getL2ByParam(String paramName) {
}
}
- @Override
- public double getLearningRateByParam(String paramName) {
- switch (paramName) {
- case GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS:
- case GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS:
- case GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS:
- case GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS:
- return learningRate;
- case GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS:
- case GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS:
- if (!Double.isNaN(biasLearningRate)) {
- //Bias learning rate has been explicitly set
- return biasLearningRate;
- } else {
- return learningRate;
- }
- default:
- throw new IllegalArgumentException("Unknown parameter name: \"" + paramName + "\"");
- }
- }
-
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
return LSTMHelpers.getMemoryReport(this, inputType);
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java
index 7d79b0448bab..daea0f9ef6e3 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java
@@ -28,12 +28,14 @@
import org.deeplearning4j.nn.layers.recurrent.LSTMHelpers;
import org.deeplearning4j.nn.params.GravesLSTMParamInitializer;
import org.deeplearning4j.optimize.api.IterationListener;
-import org.deeplearning4j.util.LayerValidation;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.activations.impl.ActivationSigmoid;
import org.nd4j.linalg.api.ndarray.INDArray;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
/**
* LSTM recurrent net, based on Graves: Supervised Sequence Labelling with Recurrent Neural Networks
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java
index 757b71ede86c..5b58744a8e48 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java
@@ -23,18 +23,19 @@
import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.constraint.BaseConstraint;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.nn.layers.recurrent.LSTMHelpers;
import org.deeplearning4j.nn.params.LSTMParamInitializer;
import org.deeplearning4j.optimize.api.IterationListener;
-import org.deeplearning4j.util.LayerValidation;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.activations.impl.ActivationSigmoid;
import org.nd4j.linalg.api.ndarray.INDArray;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
/**
* LSTM recurrent net without peephole connections.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java
index fa100dbb792b..6dc5960c11ba 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java
@@ -24,8 +24,8 @@
import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
-import org.deeplearning4j.nn.conf.constraint.BaseConstraint;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
+import org.deeplearning4j.nn.conf.dropout.IDropout;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer;
import org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer;
@@ -69,21 +69,25 @@
@JsonSubTypes.Type(value = ZeroPaddingLayer.class, name = "zeroPadding"),
@JsonSubTypes.Type(value = ZeroPadding1DLayer.class, name = "zeroPadding1d"),
@JsonSubTypes.Type(value = FrozenLayer.class, name = "FrozenLayer"),
+ @JsonSubTypes.Type(value = Upsampling2D.class, name = "Upsampling2D"),
@JsonSubTypes.Type(value = Yolo2OutputLayer.class, name = "Yolo2OutputLayer")
})
@Data
@NoArgsConstructor
public abstract class Layer implements Serializable, Cloneable {
protected String layerName;
- protected double dropOut;
+ protected IDropout iDropout;
protected List constraints;
public Layer(Builder builder) {
this.layerName = builder.layerName;
- this.dropOut = builder.dropOut;
+ this.iDropout = builder.iDropout;
}
+ /**
+ * Initialize the weight constraints. Should be called last, in the outer-most constructor
+ */
protected void initializeConstraints(Builder> builder){
//Note: this has to be done AFTER all constructors have finished - otherwise the required
// fields may not yet be set yet
@@ -111,7 +115,12 @@ protected void initializeConstraints(Builder> builder){
allConstraints.add(c2);
}
}
- this.constraints = allConstraints;
+ if(allConstraints.size() > 0) {
+ this.constraints = allConstraints;
+ } else {
+ this.constraints = null;
+ }
+ this.iDropout = builder.iDropout;
}
/**
@@ -121,7 +130,8 @@ protected void initializeConstraints(Builder> builder){
*/
public void resetLayerDefaultConfig() {
//clear the learning related params for all layers in the origConf and set to defaults
- this.setDropOut(Double.NaN);
+ this.iDropout = null;
+ this.constraints = null;
}
@Override
@@ -137,6 +147,9 @@ public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfigurati
Collection iterationListeners, int layerIndex, INDArray layerParamsView,
boolean initializeParams);
+ /**
+ * @return The parameter initializer for this model
+ */
public abstract ParamInitializer initializer();
/**
@@ -191,16 +204,6 @@ public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfigurati
*/
public abstract double getL2ByParam(String paramName);
- /**
- * Get the (initial) learning rate coefficient for the given parameter.
- * Different parameters may be configured to have different learning rates, though commonly all parameters will
- * have the same learning rate
- *
- * @param paramName Parameter name
- * @return Initial learning rate value for that parameter
- */
- public abstract double getLearningRateByParam(String paramName);
-
/**
* Is the specified parameter a layerwise pretraining only parameter?
* For example, visible bias params in an autoencoder (or, decoder params in a variational autoencoder) aren't
@@ -212,20 +215,6 @@ public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfigurati
*/
public abstract boolean isPretrainParam(String paramName);
- /**
- * Get the updater for the given parameter. Typically the same updater will be used for all updaters, but this
- * is not necessarily the case
- *
- * @param paramName Parameter name
- * @return Updater for the parameter
- * @deprecated Use {@link #getIUpdaterByParam(String)}
- */
- @Deprecated
- public Updater getUpdaterByParam(String paramName) {
- throw new UnsupportedOperationException(
- "Not supported: all layers with parameters should override this method");
- }
-
/**
* Get the updater for the given parameter. Typically the same updater will be used for all updaters, but this
* is not necessarily the case
@@ -233,7 +222,7 @@ public Updater getUpdaterByParam(String paramName) {
* @param paramName Parameter name
* @return IUpdater for the parameter
*/
- public IUpdater getIUpdaterByParam(String paramName) {
+ public IUpdater getUpdaterByParam(String paramName) {
throw new UnsupportedOperationException(
"Not supported: all layers with parameters should override this method");
}
@@ -249,10 +238,10 @@ public IUpdater getIUpdaterByParam(String paramName) {
@SuppressWarnings("unchecked")
public abstract static class Builder> {
protected String layerName = null;
- protected double dropOut = Double.NaN;
protected List allParamConstraints;
protected List weightConstraints;
protected List biasConstraints;
+ protected IDropout iDropout;
/**
* Layer name assigns layer string name.
@@ -284,10 +273,21 @@ public T name(String layerName) {
*
*
* @param inputRetainProbability Dropout probability (probability of retaining each input activation value for a layer)
+ * @see #dropOut(IDropout)
*/
public T dropOut(double inputRetainProbability) {
- this.dropOut = inputRetainProbability;
- return (T) this;
+ return dropOut(new Dropout(inputRetainProbability));
+ }
+
+ /**
+ * Set the dropout for all layers in this network
+ *
+ * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout},
+ * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc
+ */
+ public T dropOut(IDropout dropout){
+ this.iDropout = dropout;
+ return (T)this;
}
/**
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java
index 6f15d72be1d7..f7a5efba5d42 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java
@@ -1,16 +1,18 @@
package org.deeplearning4j.nn.conf.layers;
import lombok.extern.slf4j.Slf4j;
+import org.deeplearning4j.exception.DL4JInvalidConfigException;
import org.deeplearning4j.nn.api.layers.LayerConstraint;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.Distribution;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
+import org.deeplearning4j.nn.conf.dropout.IDropout;
import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.util.OneTimeLogger;
-import org.nd4j.linalg.learning.config.*;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
/**
* Created by Alex on 22/02/2017.
@@ -19,306 +21,47 @@
public class LayerValidation {
/**
- * Validate the updater configuration - setting the default updater values, if necessary
+ * Asserts that the layer nIn and nOut values are set for the layer
+ *
+ * @param layerType Type of layer ("DenseLayer", etc)
+ * @param layerName Name of the layer (may be null if not set)
+ * @param layerIndex Index of the layer
+ * @param nIn nIn value
+ * @param nOut nOut value
*/
- public static void updaterValidation(String layerName, Layer layer, Double learningRate, Double momentum,
- Map momentumSchedule, Double adamMeanDecay, Double adamVarDecay, Double rho,
- Double rmsDecay, Double epsilon) {
- BaseLayer bLayer;
- if (layer instanceof FrozenLayer && ((FrozenLayer) layer).getLayer() instanceof BaseLayer) {
- bLayer = (BaseLayer) ((FrozenLayer) layer).getLayer();
- } else if (layer instanceof BaseLayer) {
- bLayer = (BaseLayer) layer;
- } else {
- return;
+ public static void assertNInNOutSet(String layerType, String layerName, int layerIndex, int nIn, int nOut) {
+ if (nIn <= 0 || nOut <= 0) {
+ if (layerName == null)
+ layerName = "(name not set)";
+ throw new DL4JInvalidConfigException(layerType + " (index=" + layerIndex + ", name=" + layerName + ") nIn="
+ + nIn + ", nOut=" + nOut + "; nIn and nOut must be > 0");
}
- updaterValidation(layerName, bLayer, learningRate == null ? Double.NaN : learningRate,
- momentum == null ? Double.NaN : momentum, momentumSchedule,
- adamMeanDecay == null ? Double.NaN : adamMeanDecay,
- adamVarDecay == null ? Double.NaN : adamVarDecay, rho == null ? Double.NaN : rho,
- rmsDecay == null ? Double.NaN : rmsDecay, epsilon == null ? Double.NaN : epsilon);
}
- /**
- * Validate the updater configuration - setting the default updater values, if necessary
- */
- public static void updaterValidation(String layerName, BaseLayer layer, double learningRate, double momentum,
- Map momentumSchedule, double adamMeanDecay, double adamVarDecay, double rho,
- double rmsDecay, double epsilon) {
- if ((!Double.isNaN(momentum) || !Double.isNaN(layer.getMomentum())) && layer.getUpdater() != Updater.NESTEROVS)
- OneTimeLogger.warn(log, "Layer \"" + layerName
- + "\" momentum has been set but will not be applied unless the updater is set to NESTEROVS.");
- if ((momentumSchedule != null || layer.getMomentumSchedule() != null)
- && layer.getUpdater() != Updater.NESTEROVS)
- OneTimeLogger.warn(log, "Layer \"" + layerName
- + "\" momentum schedule has been set but will not be applied unless the updater is set to NESTEROVS.");
- if ((!Double.isNaN(adamVarDecay) || (!Double.isNaN(layer.getAdamVarDecay())))
- && layer.getUpdater() != Updater.ADAM)
- OneTimeLogger.warn(log, "Layer \"" + layerName
- + "\" adamVarDecay is set but will not be applied unless the updater is set to Adam.");
- if ((!Double.isNaN(adamMeanDecay) || !Double.isNaN(layer.getAdamMeanDecay()))
- && layer.getUpdater() != Updater.ADAM)
- OneTimeLogger.warn(log, "Layer \"" + layerName
- + "\" adamMeanDecay is set but will not be applied unless the updater is set to Adam.");
- if ((!Double.isNaN(rho) || !Double.isNaN(layer.getRho())) && layer.getUpdater() != Updater.ADADELTA)
- OneTimeLogger.warn(log, "Layer \"" + layerName
- + "\" rho is set but will not be applied unless the updater is set to ADADELTA.");
- if ((!Double.isNaN(rmsDecay) || (!Double.isNaN(layer.getRmsDecay()))) && layer.getUpdater() != Updater.RMSPROP)
- OneTimeLogger.warn(log, "Layer \"" + layerName
- + "\" rmsdecay is set but will not be applied unless the updater is set to RMSPROP.");
-
- //Set values from old (deprecated) .epsilon(), .momentum(), etc methods to the built-in updaters
- //Note that there are *layer* versions (available via the layer) and *global* versions (via the method args)
- //The layer versions take precedence over the global versions. If neither are set, we use whatever is set
- // on the IUpdater instance, which may be the default, or may be user-configured
- //Note that default values for all other parameters are set by default in the Sgd/Adam/whatever classes
- //Hence we don't need to set them here
- //Finally: we'll also set the (updater enumeration field to something sane) to avoid updater=SGD,
- // iupdater=Adam() type situations. Though the updater field isn't used, we don't want to confuse users
- IUpdater u = layer.getIUpdater();
- if (!Double.isNaN(layer.getLearningRate())) {
- //Note that for LRs, if user specifies .learningRate(x).updater(Updater.SGD) (for example), we need to set the
- // LR in the Sgd object. We can do this using the schedules method, which also works for custom updaters
- //Local layer LR set
- u.applySchedules(0, layer.getLearningRate());
- } else if (!Double.isNaN(learningRate)) {
- //Global LR set
- u.applySchedules(0, learningRate);
- }
-
-
- if (u instanceof Sgd) {
- layer.setUpdater(Updater.SGD);
-
- } else if (u instanceof Adam) {
- Adam a = (Adam) u;
- if (!Double.isNaN(layer.getEpsilon())) {
- //user has done legacy .epsilon(...) on the layer itself
- a.setEpsilon(layer.getEpsilon());
- } else if (!Double.isNaN(epsilon)) {
- //user has done legacy .epsilon(...) on MultiLayerNetwork or ComputationGraph
- a.setEpsilon(epsilon);
- }
-
- if (!Double.isNaN(layer.getAdamMeanDecay())) {
- a.setBeta1(layer.getAdamMeanDecay());
- } else if (!Double.isNaN(adamMeanDecay)) {
- a.setBeta1(adamMeanDecay);
- }
-
- if (!Double.isNaN(layer.getAdamVarDecay())) {
- a.setBeta2(layer.getAdamVarDecay());
- } else if (!Double.isNaN(adamVarDecay)) {
- a.setBeta2(adamVarDecay);
- }
-
- layer.setUpdater(Updater.ADAM);
-
- } else if (u instanceof AdaDelta) {
- AdaDelta a = (AdaDelta) u;
-
- if (!Double.isNaN(layer.getRho())) {
- a.setRho(layer.getRho());
- } else if (!Double.isNaN(rho)) {
- a.setRho(rho);
- }
-
- if (!Double.isNaN(layer.getEpsilon())) {
- a.setEpsilon(layer.getEpsilon());
- } else if (!Double.isNaN(epsilon)) {
- a.setEpsilon(epsilon);
- }
-
- layer.setUpdater(Updater.ADADELTA);
-
- } else if (u instanceof Nesterovs) {
- Nesterovs n = (Nesterovs) u;
- if (!Double.isNaN(layer.getMomentum())) {
- n.setMomentum(layer.getMomentum());
- } else if (!Double.isNaN(momentum)) {
- n.setMomentum(momentum);
- }
-
- if (layer.getMomentumSchedule() != null && !layer.getMomentumSchedule().isEmpty()) {
- n.setMomentumSchedule(layer.getMomentumSchedule());
- } else if (momentumSchedule != null && !momentumSchedule.isEmpty()) {
- n.setMomentumSchedule(momentumSchedule);
- }
- layer.setUpdater(Updater.NESTEROVS);
-
- } else if (u instanceof AdaGrad) {
- AdaGrad a = (AdaGrad) u;
- if (!Double.isNaN(layer.getEpsilon())) {
- a.setEpsilon(layer.getEpsilon());
- } else if (!Double.isNaN(epsilon)) {
- a.setEpsilon(epsilon);
- }
-
- layer.setUpdater(Updater.ADAGRAD);
-
- } else if (u instanceof RmsProp) {
- RmsProp r = (RmsProp) u;
-
- if (!Double.isNaN(layer.getEpsilon())) {
- r.setEpsilon(layer.getEpsilon());
- } else if (!Double.isNaN(epsilon)) {
- r.setEpsilon(epsilon);
- }
- if (!Double.isNaN(layer.getRmsDecay())) {
- r.setRmsDecay(layer.getRmsDecay());
- } else if (!Double.isNaN(rmsDecay)) {
- r.setRmsDecay(rmsDecay);
- }
- layer.setUpdater(Updater.RMSPROP);
-
- } else if (u instanceof AdaMax) {
- AdaMax a = (AdaMax) u;
-
- if (!Double.isNaN(layer.getEpsilon())) {
- a.setEpsilon(layer.getEpsilon());
- } else if (!Double.isNaN(epsilon)) {
- a.setEpsilon(epsilon);
- }
-
- if (!Double.isNaN(layer.getAdamMeanDecay())) {
- a.setBeta1(layer.getAdamMeanDecay());
- } else if (!Double.isNaN(adamMeanDecay)) {
- a.setBeta1(adamMeanDecay);
- }
-
- if (!Double.isNaN(layer.getAdamVarDecay())) {
- a.setBeta2(layer.getAdamVarDecay());
- } else if (!Double.isNaN(adamVarDecay)) {
- a.setBeta2(adamVarDecay);
- }
- layer.setUpdater(Updater.ADAMAX);
-
- } else if (u instanceof NoOp) {
- layer.setUpdater(Updater.NONE);
- } else {
- //Probably a custom updater
- layer.setUpdater(null);
- }
-
-
- //Finally: Let's set the legacy momentum, epsilon, rmsDecay fields on the layer
- //At this point, it's purely cosmetic, to avoid NaNs etc there that might confuse users
- //The *true* values are now in the IUpdater instances
- if (layer.getUpdater() != null) { //May be null with custom updaters etc
- switch (layer.getUpdater()) {
- case NESTEROVS:
- if (Double.isNaN(momentum) && Double.isNaN(layer.getMomentum())) {
- layer.setMomentum(Nesterovs.DEFAULT_NESTEROV_MOMENTUM);
- } else if (Double.isNaN(layer.getMomentum()))
- layer.setMomentum(momentum);
- if (momentumSchedule != null && layer.getMomentumSchedule() == null)
- layer.setMomentumSchedule(momentumSchedule);
- else if (momentumSchedule == null && layer.getMomentumSchedule() == null)
- layer.setMomentumSchedule(new HashMap());
- break;
- case ADAM:
- if (Double.isNaN(adamMeanDecay) && Double.isNaN(layer.getAdamMeanDecay())) {
- layer.setAdamMeanDecay(Adam.DEFAULT_ADAM_BETA1_MEAN_DECAY);
- } else if (Double.isNaN(layer.getAdamMeanDecay()))
- layer.setAdamMeanDecay(adamMeanDecay);
-
- if (Double.isNaN(adamVarDecay) && Double.isNaN(layer.getAdamVarDecay())) {
- layer.setAdamVarDecay(Adam.DEFAULT_ADAM_BETA2_VAR_DECAY);
- } else if (Double.isNaN(layer.getAdamVarDecay()))
- layer.setAdamVarDecay(adamVarDecay);
-
- if (Double.isNaN(epsilon) && Double.isNaN(layer.getEpsilon())) {
- layer.setEpsilon(Adam.DEFAULT_ADAM_EPSILON);
- } else if (Double.isNaN(layer.getEpsilon())) {
- layer.setEpsilon(epsilon);
- }
- break;
- case ADADELTA:
- if (Double.isNaN(rho) && Double.isNaN(layer.getRho())) {
- layer.setRho(AdaDelta.DEFAULT_ADADELTA_RHO);
- } else if (Double.isNaN(layer.getRho())) {
- layer.setRho(rho);
- }
-
- if (Double.isNaN(epsilon) && Double.isNaN(layer.getEpsilon())) {
- layer.setEpsilon(AdaDelta.DEFAULT_ADADELTA_EPSILON);
- } else if (Double.isNaN(layer.getEpsilon())) {
- layer.setEpsilon(epsilon);
- }
- break;
- case ADAGRAD:
- if (Double.isNaN(epsilon) && Double.isNaN(layer.getEpsilon())) {
- layer.setEpsilon(AdaGrad.DEFAULT_ADAGRAD_EPSILON);
- } else if (Double.isNaN(layer.getEpsilon())) {
- layer.setEpsilon(epsilon);
- }
- break;
- case RMSPROP:
- if (Double.isNaN(rmsDecay) && Double.isNaN(layer.getRmsDecay())) {
- layer.setRmsDecay(RmsProp.DEFAULT_RMSPROP_RMSDECAY);
- } else if (Double.isNaN(layer.getRmsDecay()))
- layer.setRmsDecay(rmsDecay);
-
- if (Double.isNaN(epsilon) && Double.isNaN(layer.getEpsilon())) {
- layer.setEpsilon(RmsProp.DEFAULT_RMSPROP_EPSILON);
- } else if (Double.isNaN(layer.getEpsilon())) {
- layer.setEpsilon(epsilon);
- }
- break;
- case ADAMAX:
- if (Double.isNaN(adamMeanDecay) && Double.isNaN(layer.getAdamMeanDecay())) {
- layer.setAdamMeanDecay(AdaMax.DEFAULT_ADAMAX_BETA1_MEAN_DECAY);
- } else if (Double.isNaN(layer.getAdamMeanDecay()))
- layer.setAdamMeanDecay(adamMeanDecay);
-
- if (Double.isNaN(adamVarDecay) && Double.isNaN(layer.getAdamVarDecay())) {
- layer.setAdamVarDecay(AdaMax.DEFAULT_ADAMAX_BETA2_VAR_DECAY);
- } else if (Double.isNaN(layer.getAdamVarDecay()))
- layer.setAdamVarDecay(adamVarDecay);
-
- if (Double.isNaN(epsilon) && Double.isNaN(layer.getEpsilon())) {
- layer.setEpsilon(AdaMax.DEFAULT_ADAMAX_EPSILON);
- } else if (Double.isNaN(layer.getEpsilon())) {
- layer.setEpsilon(epsilon);
- }
- }
- }
- }
-
- public static void generalValidation(String layerName, Layer layer, boolean useDropConnect, Double dropOut,
+ public static void generalValidation(String layerName, Layer layer, IDropout iDropOut,
Double l2, Double l2Bias, Double l1, Double l1Bias,
Distribution dist, List allParamConstraints,
List weightConstraints, List biasConstraints) {
- generalValidation(layerName, layer, useDropConnect, dropOut == null ? 0.0 : dropOut,
+ generalValidation(layerName, layer, iDropOut,
l2 == null ? Double.NaN : l2, l2Bias == null ? Double.NaN : l2Bias,
l1 == null ? Double.NaN : l1, l1Bias == null ? Double.NaN : l1Bias, dist, allParamConstraints, weightConstraints, biasConstraints);
}
- public static void generalValidation(String layerName, Layer layer, boolean useDropConnect, double dropOut,
+ public static void generalValidation(String layerName, Layer layer, IDropout iDropout,
double l2, double l2Bias, double l1, double l1Bias,
Distribution dist, List allParamConstraints,
List weightConstraints, List biasConstraints) {
if (layer != null) {
-
- if (useDropConnect && (Double.isNaN(dropOut) && (Double.isNaN(layer.getDropOut()))))
- OneTimeLogger.warn(log, "Layer \"" + layerName
- + "\" dropConnect is set to true but dropout rate has not been added to configuration.");
- if (useDropConnect && layer.getDropOut() == 0.0)
- OneTimeLogger.warn(log,
- "Layer \"" + layerName + " dropConnect is set to true but dropout rate is set to 0.0");
-
if (layer instanceof BaseLayer) {
BaseLayer bLayer = (BaseLayer) layer;
- configureBaseLayer(layerName, bLayer, useDropConnect, dropOut, l2, l2Bias, l1,
- l1Bias, dist);
+ configureBaseLayer(layerName, bLayer, iDropout, l2, l2Bias, l1, l1Bias, dist);
} else if (layer instanceof FrozenLayer && ((FrozenLayer) layer).getLayer() instanceof BaseLayer) {
BaseLayer bLayer = (BaseLayer) ((FrozenLayer) layer).getLayer();
- configureBaseLayer(layerName, bLayer, useDropConnect, dropOut, l2, l2Bias, l1,
- l1Bias, dist);
+ configureBaseLayer(layerName, bLayer, iDropout, l2, l2Bias, l1, l1Bias, dist);
}
if(layer.getConstraints() == null || layer.constraints.isEmpty()) {
@@ -347,13 +90,17 @@ public static void generalValidation(String layerName, Layer layer, boolean useD
}
}
- layer.setConstraints(allConstraints);
+ if(allConstraints.size() > 0){
+ layer.setConstraints(allConstraints);
+ } else {
+ layer.setConstraints(null);
+ }
}
}
}
- private static void configureBaseLayer(String layerName, BaseLayer bLayer, boolean useDropConnect,
- Double dropOut, Double l2, Double l2Bias, Double l1, Double l1Bias,
+ private static void configureBaseLayer(String layerName, BaseLayer bLayer, IDropout iDropout, Double l2, Double l2Bias,
+ Double l1, Double l1Bias,
Distribution dist) {
if (!Double.isNaN(l1) && Double.isNaN(bLayer.getL1())) {
@@ -382,6 +129,10 @@ private static void configureBaseLayer(String layerName, BaseLayer bLayer, bool
bLayer.setL1Bias(0.0);
}
+ if(bLayer.getIDropout() == null){
+ bLayer.setIDropout(iDropout);
+ }
+
if (bLayer.getWeightInit() == WeightInit.DISTRIBUTION) {
if (dist != null && bLayer.getDist() == null)
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java
index c340f4b6e83e..190655b9bfbb 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java
@@ -101,12 +101,6 @@ public double getL2ByParam(String paramName) {
return 0;
}
- @Override
- public double getLearningRateByParam(String paramName) {
- //Not applicable
- return 0;
- }
-
@Override
public boolean isPretrainParam(String paramName) {
return false; //No params in LRN
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java
index 4e118d418711..13707aad608d 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java
@@ -27,7 +27,6 @@
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.params.DefaultParamInitializer;
import org.deeplearning4j.optimize.api.IterationListener;
-import org.deeplearning4j.util.LayerValidation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RBM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RBM.java
index 3eebc2f07669..207a826054cc 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RBM.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RBM.java
@@ -104,7 +104,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
int updaterStateSize = (int) getIUpdater().stateSize(numParams);
int trainSizePerEx = 0;
- if (getDropOut() > 0) {
+ if (getIDropout() != null) {
if (false) {
//TODO drop connect
//Dup the weights... note that this does NOT depend on the minibatch size...
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java
index 9db77892efd9..c8464c5839a9 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java
@@ -11,7 +11,6 @@
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.params.DefaultParamInitializer;
import org.deeplearning4j.optimize.api.IterationListener;
-import org.deeplearning4j.util.LayerValidation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java
index 606666d52efb..d00ee2611743 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java
@@ -4,11 +4,9 @@
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.ToString;
-import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.params.EmptyParamInitializer;
import org.deeplearning4j.optimize.api.IterationListener;
import org.deeplearning4j.util.ConvolutionUtils;
import org.nd4j.linalg.api.ndarray.INDArray;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
index 1e71e60e3354..ff988cbc8ff6 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
@@ -151,12 +151,6 @@ public double getL2ByParam(String paramName) {
return 0;
}
- @Override
- public double getLearningRateByParam(String paramName) {
- //Not applicable
- return 0;
- }
-
@Override
public boolean isPretrainParam(String paramName) {
throw new UnsupportedOperationException("SubsamplingLayer does not contain parameters");
@@ -176,7 +170,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
//Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass
int trainingWorkingSizePerEx = im2colSizePerEx;
- if (getDropOut() > 0) {
+ if (getIDropout() != null) {
//Dup on the input before dropout, but only for training
trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java
new file mode 100644
index 000000000000..58e884ab7d65
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java
@@ -0,0 +1,140 @@
+/*-
+ *
+ * * Copyright 2017 Skymind,Inc.
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+package org.deeplearning4j.nn.conf.layers;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
+import lombok.ToString;
+import org.deeplearning4j.nn.api.ParamInitializer;
+import org.deeplearning4j.nn.conf.InputPreProcessor;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
+import org.deeplearning4j.nn.conf.memory.MemoryReport;
+import org.deeplearning4j.nn.params.EmptyParamInitializer;
+import org.deeplearning4j.optimize.api.IterationListener;
+import org.nd4j.linalg.api.ndarray.INDArray;
+
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * Upsampling 1D layer
+ *
+ * @author Max Pumperla
+ */
+
+@Data
+@NoArgsConstructor
+@ToString(callSuper = true)
+@EqualsAndHashCode(callSuper = true)
+public class Upsampling1D extends BaseUpsamplingLayer {
+
+ protected int size;
+
+ protected Upsampling1D(UpsamplingBuilder builder) {
+ super(builder);
+ this.size = builder.size;
+ }
+
+ @Override
+ public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
+ Collection iterationListeners, int layerIndex, INDArray layerParamsView,
+ boolean initializeParams) {
+ org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling1D ret =
+ new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling1D(conf);
+ ret.setListeners(iterationListeners);
+ ret.setIndex(layerIndex);
+ ret.setParamsViewArray(layerParamsView);
+ Map paramTable = initializer().init(conf, layerParamsView, initializeParams);
+ ret.setParamTable(paramTable);
+ ret.setConf(conf);
+ return ret;
+ }
+
+ @Override
+ public Upsampling1D clone() {
+ Upsampling1D clone = (Upsampling1D) super.clone();
+ return clone;
+ }
+
+ @Override
+ public InputType getOutputType(int layerIndex, InputType inputType) {
+ if (inputType == null || inputType.getType() != InputType.Type.RNN) {
+ throw new IllegalStateException("Invalid input for 1D Upsampling layer (layer index = " + layerIndex
+ + ", layer name = \"" + getLayerName() + "\"): expect RNN input type with size > 0. Got: "
+ + inputType);
+ }
+ InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType;
+ return InputType.recurrent(recurrent.getSize(), recurrent.getTimeSeriesLength());
+ }
+
+ @Override
+ public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
+ if (inputType == null) {
+ throw new IllegalStateException("Invalid input for Upsampling layer (layer name=\"" + getLayerName()
+ + "\"): input is null");
+ }
+ return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName());
+ }
+
+ @Override
+ public LayerMemoryReport getMemoryReport(InputType inputType) {
+ InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType;
+ InputType.InputTypeRecurrent outputType = (InputType.InputTypeRecurrent) getOutputType(-1, inputType);
+
+ int im2colSizePerEx = recurrent.getSize() * outputType.getTimeSeriesLength() * size;
+ int trainingWorkingSizePerEx = im2colSizePerEx;
+ if (getIDropout() != null) {
+ trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
+ }
+
+ return new LayerMemoryReport.Builder(layerName, Upsampling1D.class, inputType, outputType)
+ .standardMemory(0, 0) //No params
+ .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx)
+ .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
+ .build();
+ }
+
+ @NoArgsConstructor
+ public static class Builder extends UpsamplingBuilder {
+
+ public Builder(int size) {
+ super(size);
+ }
+
+ /**
+ * Upsampling size
+ *
+ * @param size upsampling size in height and width dimensions
+ */
+ public Builder size(int size) {
+
+ this.size = size;
+ return this;
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public Upsampling1D build() {
+ return new Upsampling1D(this);
+ }
+ }
+
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java
index 6e355c28b72a..b7d5bf322b21 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java
@@ -21,13 +21,11 @@
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.ToString;
-import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.nn.conf.memory.MemoryReport;
-import org.deeplearning4j.nn.params.EmptyParamInitializer;
import org.deeplearning4j.optimize.api.IterationListener;
import org.nd4j.linalg.api.ndarray.INDArray;
@@ -35,7 +33,7 @@
import java.util.Map;
/**
- * Upsampling layer
+ * Upsampling 2D layer
*
* @author Max Pumperla
*/
@@ -44,11 +42,11 @@
@NoArgsConstructor
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
-public class Upsampling2D extends Layer {
+public class Upsampling2D extends BaseUpsamplingLayer {
protected int size;
- protected Upsampling2D(Upsampling2DBuilder builder) {
+ protected Upsampling2D(UpsamplingBuilder builder) {
super(builder);
this.size = builder.size;
}
@@ -74,11 +72,6 @@ public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
return ret;
}
- @Override
- public ParamInitializer initializer() {
- return EmptyParamInitializer.getInstance();
- }
-
@Override
public InputType getOutputType(int layerIndex, InputType inputType) {
if (inputType == null || inputType.getType() != InputType.Type.CNN) {
@@ -93,11 +86,6 @@ public InputType getOutputType(int layerIndex, InputType inputType) {
return InputType.convolutional(size * inHeight, size * inWidth, inDepth);
}
- @Override
- public void setNIn(InputType inputType, boolean override) {
- //No op: upsampling layer doesn't have nIn value
- }
-
@Override
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
if (inputType == null) {
@@ -107,29 +95,6 @@ public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName());
}
- @Override
- public double getL1ByParam(String paramName) {
- //Not applicable
- return 0;
- }
-
- @Override
- public double getL2ByParam(String paramName) {
- //Not applicable
- return 0;
- }
-
- @Override
- public double getLearningRateByParam(String paramName) {
- //Not applicable
- return 0;
- }
-
- @Override
- public boolean isPretrainParam(String paramName) {
- throw new UnsupportedOperationException("UpsamplingLayer does not contain parameters");
- }
-
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
@@ -140,7 +105,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
// Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass
int trainingWorkingSizePerEx = im2colSizePerEx;
- if (getDropOut() > 0) {
+ if (getIDropout() != null) {
//Dup on the input before dropout, but only for training
trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
}
@@ -154,7 +119,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
@NoArgsConstructor
- public static class Builder extends Upsampling2DBuilder {
+ public static class Builder extends UpsamplingBuilder {
public Builder(int size) {
super(size);
@@ -171,7 +136,6 @@ public Builder size(int size) {
return this;
}
-
@Override
@SuppressWarnings("unchecked")
public Upsampling2D build() {
@@ -179,14 +143,4 @@ public Upsampling2D build() {
}
}
- @NoArgsConstructor
- protected static abstract class Upsampling2DBuilder>
- extends Layer.Builder {
- protected int size = 1;
-
- protected Upsampling2DBuilder(int size) {
- this.size = size;
- }
- }
-
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java
index eddfec3b6335..a94f6f700838 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java
@@ -108,11 +108,6 @@ public double getL2ByParam(String paramName) {
return 0;
}
- @Override
- public double getLearningRateByParam(String paramName) {
- return 0;
- }
-
@Override
public boolean isPretrainParam(String paramName) {
throw new UnsupportedOperationException("ZeroPaddingLayer does not contain parameters");
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java
index a0ed0bf6f355..47b514c84ea8 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java
@@ -106,11 +106,6 @@ public double getL2ByParam(String paramName) {
return 0;
}
- @Override
- public double getLearningRateByParam(String paramName) {
- return 0;
- }
-
@Override
public boolean isPretrainParam(String paramName) {
throw new UnsupportedOperationException("ZeroPaddingLayer does not contain parameters");
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java
index 56da2b728a53..66abec1e812a 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java
@@ -3,13 +3,12 @@
import lombok.EqualsAndHashCode;
import lombok.Getter;
import org.deeplearning4j.nn.api.ParamInitializer;
+import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.Layer;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
-import org.deeplearning4j.nn.params.EmptyParamInitializer;
import org.deeplearning4j.nn.params.FrozenLayerParamInitializer;
import org.deeplearning4j.optimize.api.IterationListener;
import org.nd4j.linalg.api.ndarray.INDArray;
@@ -68,12 +67,10 @@ public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
conf.variables(false).add(s);
conf.getL1ByParam().put(s, 0.0);
conf.getL2ByParam().put(s, 0.0);
- conf.getLearningRateByParam().put(s, 0.0);
nncUnderlying.variables(false).add(s);
nncUnderlying.getL1ByParam().put(s, 0.0);
nncUnderlying.getL2ByParam().put(s, 0.0);
- nncUnderlying.getLearningRateByParam().put(s, 0.0);
}
}
@@ -110,23 +107,13 @@ public double getL2ByParam(String paramName) {
return 0;
}
- @Override
- public double getLearningRateByParam(String paramName) {
- return 0;
- }
-
@Override
public boolean isPretrainParam(String paramName) {
return false;
}
@Override
- public Updater getUpdaterByParam(String paramName) {
- return null;
- }
-
- @Override
- public IUpdater getIUpdaterByParam(String paramName) {
+ public IUpdater getUpdaterByParam(String paramName) {
return null;
}
@@ -141,6 +128,12 @@ public void setLayerName(String layerName) {
layer.setLayerName(layerName);
}
+ @Override
+ public void setConstraints(List constraints){
+ this.constraints = constraints;
+ this.layer.setConstraints(constraints);
+ }
+
public static class Builder extends Layer.Builder {
private Layer layer;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java
index 6dab4bb71b0c..c48fbd83462a 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java
@@ -1,7 +1,6 @@
package org.deeplearning4j.nn.conf.layers.objdetect;
import lombok.Data;
-import lombok.Getter;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.InputPreProcessor;
@@ -18,7 +17,6 @@
import org.nd4j.shade.jackson.databind.annotation.JsonSerialize;
import org.nd4j.shade.serde.jackson.VectorDeSerializer;
import org.nd4j.shade.serde.jackson.VectorSerializer;
-import org.nd4j.shade.serde.jackson.shaded.NDArraySerializer;
import java.util.Arrays;
import java.util.Collection;
@@ -114,11 +112,6 @@ public double getL2ByParam(String paramName) {
return 0; //No params
}
- @Override
- public double getLearningRateByParam(String paramName) {
- return 0; //No params
- }
-
@Override
public boolean isPretrainParam(String paramName) {
return false; //No params
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/GaussianReconstructionDistribution.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/GaussianReconstructionDistribution.java
index 581a0e308975..b993a39600cf 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/GaussianReconstructionDistribution.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/GaussianReconstructionDistribution.java
@@ -33,15 +33,7 @@ public class GaussianReconstructionDistribution implements ReconstructionDistrib
* Create a GaussianReconstructionDistribution with the default identity activation function.
*/
public GaussianReconstructionDistribution() {
- this("identity");
- }
-
- /**
- * @deprecated Use {@link #GaussianReconstructionDistribution(Activation)}
- */
- @Deprecated
- public GaussianReconstructionDistribution(String activationFn) {
- this(Activation.fromString(activationFn).getActivationFunction());
+ this(Activation.IDENTITY);
}
/**
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java
index 7344ab531d67..05cc5b5f3036 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java
@@ -8,11 +8,11 @@
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.BasePretrainNetwork;
+import org.deeplearning4j.nn.conf.layers.LayerValidation;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.nn.conf.memory.MemoryReport;
import org.deeplearning4j.nn.params.VariationalAutoencoderParamInitializer;
import org.deeplearning4j.optimize.api.IterationListener;
-import org.deeplearning4j.util.LayerValidation;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.activations.impl.ActivationIdentity;
@@ -80,20 +80,6 @@ public ParamInitializer initializer() {
return VariationalAutoencoderParamInitializer.getInstance();
}
- @Override
- public double getLearningRateByParam(String paramName) {
- if (paramName.endsWith("b")) {
- if (!Double.isNaN(biasLearningRate)) {
- //Bias learning rate has been explicitly set
- return biasLearningRate;
- } else {
- return learningRate;
- }
- } else {
- return learningRate;
- }
- }
-
@Override
public double getL1ByParam(String paramName) {
if (paramName.endsWith(VariationalAutoencoderParamInitializer.BIAS_KEY_SUFFIX))
@@ -153,7 +139,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) {
int trainWorkingMemSize = 2 * (inferenceWorkingMemSizePerEx + decoderFwdSizeWorking);
- if (getDropOut() > 0) {
+ if (getIDropout() != null) {
if (false) {
//TODO drop connect
//Dup the weights... note that this does NOT depend on the minibatch size...
@@ -273,15 +259,6 @@ public Builder pzxActivationFn(IActivation activationFunction) {
return this;
}
-
- /**
- * @deprecated Use {@link #pzxActivationFunction(Activation)}
- */
- @Deprecated
- public Builder pzxActivationFunction(String activationFunction) {
- return pzxActivationFn(Activation.fromString(activationFunction).getActivationFunction());
- }
-
/**
* Activation function for the input to P(z|data).
* Care should be taken with this, as some activation functions (relu, etc) are not suitable due to being
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java
index 405e6a12810f..b319e6a3cd16 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java
@@ -1,9 +1,9 @@
package org.deeplearning4j.nn.conf.preprocessor;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* @author Adam Gibson
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java
index 8d04982e6b1e..ba672ceab82f 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java
@@ -19,12 +19,12 @@
package org.deeplearning4j.nn.conf.preprocessor;
import lombok.Data;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.shape.Shape;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.shade.jackson.annotation.JsonCreator;
import org.nd4j.shade.jackson.annotation.JsonProperty;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java
index 062529c4a460..3b4e96cfbac5 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java
@@ -1,15 +1,12 @@
package org.deeplearning4j.nn.conf.preprocessor;
-import lombok.AccessLevel;
-import lombok.Data;
-import lombok.Getter;
-import lombok.Setter;
-import org.nd4j.linalg.primitives.Pair;
+import lombok.*;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.util.TimeSeriesUtils;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.util.ArrayUtil;
import org.nd4j.shade.jackson.annotation.JsonCreator;
import org.nd4j.shade.jackson.annotation.JsonProperty;
@@ -30,6 +27,7 @@
* @author Alex Black
*/
@Data
+@EqualsAndHashCode(exclude = {"product"})
public class CnnToRnnPreProcessor implements InputPreProcessor {
private int inputHeight;
private int inputWidth;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java
index 920e145f96c0..ac1d02a3a95f 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java
@@ -20,11 +20,11 @@
import lombok.Data;
import lombok.EqualsAndHashCode;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.shade.jackson.annotation.JsonCreator;
import org.nd4j.shade.jackson.annotation.JsonProperty;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java
index 3eba7bac42db..72bb972d1843 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java
@@ -18,16 +18,13 @@
package org.deeplearning4j.nn.conf.preprocessor;
-import lombok.AccessLevel;
-import lombok.Data;
-import lombok.Getter;
-import lombok.Setter;
-import org.nd4j.linalg.primitives.Pair;
+import lombok.*;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.shape.Shape;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.util.ArrayUtil;
import org.nd4j.shade.jackson.annotation.JsonCreator;
import org.nd4j.shade.jackson.annotation.JsonProperty;
@@ -50,6 +47,7 @@
* @see CnnToFeedForwardPreProcessor for opposite case (i.e., CNN -> DenseLayer etc)
*/
@Data
+@EqualsAndHashCode(exclude = {"shape"})
public class FeedForwardToCnnPreProcessor implements InputPreProcessor {
private int inputHeight;
private int inputWidth;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java
index b98454aeddf0..5facd986ab70 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java
@@ -2,13 +2,13 @@
import lombok.Data;
import lombok.NoArgsConstructor;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.util.TimeSeriesUtils;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.shape.Shape;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java
index cf3b011414e6..c3ea5b17d0be 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java
@@ -1,15 +1,12 @@
package org.deeplearning4j.nn.conf.preprocessor;
-import lombok.AccessLevel;
-import lombok.Data;
-import lombok.Getter;
-import lombok.Setter;
-import org.nd4j.linalg.primitives.Pair;
+import lombok.*;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.util.TimeSeriesUtils;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.util.ArrayUtil;
import org.nd4j.shade.jackson.annotation.JsonProperty;
@@ -30,6 +27,7 @@
* @author Alex Black
*/
@Data
+@EqualsAndHashCode(exclude = {"product"})
public class RnnToCnnPreProcessor implements InputPreProcessor {
private int inputHeight;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java
index 10bc837dd8cd..8ee0ea7120a4 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java
@@ -2,13 +2,13 @@
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.util.TimeSeriesUtils;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.shape.Shape;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java
index dfad27d8a709..c3e3672fd50a 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java
@@ -11,9 +11,9 @@
import org.nd4j.shade.jackson.databind.JsonMappingException;
import org.nd4j.shade.jackson.databind.deser.ResolvableDeserializer;
import org.nd4j.shade.jackson.databind.deser.std.StdDeserializer;
+import org.nd4j.shade.jackson.databind.node.ObjectNode;
import java.io.IOException;
-import java.util.Map;
/**
* A custom (abstract) deserializer that handles backward compatibility (currently only for updater refactoring that
@@ -38,56 +38,104 @@ public BaseNetConfigDeserializer(JsonDeserializer> defaultDeserializer, Class<
public abstract T deserialize(JsonParser jp, DeserializationContext ctxt)
throws IOException, JsonProcessingException;
-
- protected void handleUpdaterBackwardCompatibility(Layer[] layers) {
- //Updater configuration changed after 0.8.0 release
- //Previously: enumerations and a bunch of fields. Now: classes
- //Here, we manually create the appropriate Updater instances, if the iupdater field is empty
- for (int i = 0; i < layers.length; i++) {
- Layer l = layers[i];
- if (l == null || !(l instanceof BaseLayer) || ((BaseLayer) l).getIUpdater() != null) {
- //OK - no need to manually handle IUpdater instances for this layer
- continue;
+ protected boolean requiresIUpdaterFromLegacy(Layer[] layers){
+ for(Layer l : layers){
+ if(l instanceof BaseLayer){
+ BaseLayer bl = (BaseLayer)l;
+ if(bl.getIUpdater() == null && bl.initializer().numParams(bl) > 0){
+ return true;
+ }
}
+ }
+ return false;
+ }
- BaseLayer bl = (BaseLayer) l;
+ protected boolean requiresDropoutFromLegacy(Layer[] layers){
+ for(Layer l : layers){
+ if(l.getIDropout() != null){
+ return false;
+ }
+ }
+ return true;
+ }
- Updater u = bl.getUpdater();
- double lr = bl.getLearningRate();
- double eps = bl.getEpsilon();
- double rho = bl.getRho();
+ protected void handleUpdaterBackwardCompatibility(BaseLayer layer, ObjectNode on){
+ if(on != null && on.has("updater")){
+ String updaterName = on.get("updater").asText();
+ if(updaterName != null){
+ Updater u = Updater.valueOf(updaterName);
+ IUpdater iu = u.getIUpdaterWithDefaultConfig();
+ double lr = on.get("learningRate").asDouble();
+ double eps;
+ if(on.has("epsilon")){
+ eps = on.get("epsilon").asDouble();
+ } else {
+ eps = Double.NaN;
+ }
+ double rho = on.get("rho").asDouble();
+ switch (u){
+ case SGD:
+ ((Sgd)iu).setLearningRate(lr);
+ break;
+ case ADAM:
+ if(Double.isNaN(eps)){
+ eps = Adam.DEFAULT_ADAM_EPSILON;
+ }
+ ((Adam)iu).setLearningRate(lr);
+ ((Adam)iu).setBeta1(on.get("adamMeanDecay").asDouble());
+ ((Adam)iu).setBeta2(on.get("adamVarDecay").asDouble());
+ ((Adam)iu).setEpsilon(eps);
+ break;
+ case ADAMAX:
+ if(Double.isNaN(eps)){
+ eps = AdaMax.DEFAULT_ADAMAX_EPSILON;
+ }
+ ((AdaMax)iu).setLearningRate(lr);
+ ((AdaMax)iu).setBeta1(on.get("adamMeanDecay").asDouble());
+ ((AdaMax)iu).setBeta2(on.get("adamVarDecay").asDouble());
+ ((AdaMax)iu).setEpsilon(eps);
+ break;
+ case ADADELTA:
+ if(Double.isNaN(eps)){
+ eps = AdaDelta.DEFAULT_ADADELTA_EPSILON;
+ }
+ ((AdaDelta)iu).setRho(rho);
+ ((AdaDelta)iu).setEpsilon(eps);
+ break;
+ case NESTEROVS:
+ ((Nesterovs)iu).setLearningRate(lr);
+ ((Nesterovs)iu).setMomentum(on.get("momentum").asDouble());
+ break;
+ case NADAM:
+ if(Double.isNaN(eps)){
+ eps = Nadam.DEFAULT_NADAM_EPSILON;
+ }
+ ((Nadam)iu).setLearningRate(lr);
+ ((Nadam)iu).setBeta1(on.get("adamMeanDecay").asDouble());
+ ((Nadam)iu).setBeta2(on.get("adamVarDecay").asDouble());
+ ((Nadam)iu).setEpsilon(eps);
+ break;
+ case ADAGRAD:
+ if(Double.isNaN(eps)){
+ eps = AdaGrad.DEFAULT_ADAGRAD_EPSILON;
+ }
+ ((AdaGrad)iu).setLearningRate(lr);
+ ((AdaGrad)iu).setEpsilon(eps);
+ break;
+ case RMSPROP:
+ if(Double.isNaN(eps)){
+ eps = RmsProp.DEFAULT_RMSPROP_EPSILON;
+ }
+ ((RmsProp)iu).setLearningRate(lr);
+ ((RmsProp)iu).setEpsilon(eps);
+ ((RmsProp)iu).setRmsDecay(on.get("rmsDecay").asDouble());
+ break;
+ default:
+ //No op
+ break;
+ }
- switch (u) {
- case SGD:
- bl.setIUpdater(new Sgd(lr));
- break;
- case ADAM:
- double meanDecay = bl.getAdamMeanDecay();
- double varDecay = bl.getAdamVarDecay();
- bl.setIUpdater(Adam.builder().learningRate(lr).beta1(meanDecay).beta2(varDecay).epsilon(eps)
- .build());
- break;
- case ADADELTA:
- bl.setIUpdater(new AdaDelta(rho, eps));
- break;
- case NESTEROVS:
- Map momentumSchedule = bl.getMomentumSchedule();
- double momentum = bl.getMomentum();
- bl.setIUpdater(new Nesterovs(lr, momentum, momentumSchedule));
- break;
- case ADAGRAD:
- bl.setIUpdater(new AdaGrad(lr, eps));
- break;
- case RMSPROP:
- double rmsDecay = bl.getRmsDecay();
- bl.setIUpdater(new RmsProp(lr, rmsDecay, eps));
- break;
- case NONE:
- bl.setIUpdater(new NoOp());
- break;
- case CUSTOM:
- //No op - shouldn't happen
- break;
+ layer.setIUpdater(iu);
}
}
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java
index 8f094d64e029..26efa9f72091 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java
@@ -1,16 +1,24 @@
package org.deeplearning4j.nn.conf.serde;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.graph.GraphVertex;
import org.deeplearning4j.nn.conf.graph.LayerVertex;
+import org.deeplearning4j.nn.conf.layers.BaseLayer;
import org.deeplearning4j.nn.conf.layers.Layer;
+import org.deeplearning4j.nn.conf.weightnoise.DropConnect;
+import org.nd4j.shade.jackson.core.JsonLocation;
import org.nd4j.shade.jackson.core.JsonParser;
-import org.nd4j.shade.jackson.core.JsonProcessingException;
import org.nd4j.shade.jackson.databind.DeserializationContext;
import org.nd4j.shade.jackson.databind.JsonDeserializer;
+import org.nd4j.shade.jackson.databind.JsonNode;
+import org.nd4j.shade.jackson.databind.ObjectMapper;
+import org.nd4j.shade.jackson.databind.node.ObjectNode;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -23,10 +31,11 @@ public ComputationGraphConfigurationDeserializer(JsonDeserializer> defaultDese
}
@Override
- public ComputationGraphConfiguration deserialize(JsonParser jp, DeserializationContext ctxt)
- throws IOException, JsonProcessingException {
+ public ComputationGraphConfiguration deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException {
+ long charOffsetStart = jp.getCurrentLocation().getCharOffset();
ComputationGraphConfiguration conf = (ComputationGraphConfiguration) defaultDeserializer.deserialize(jp, ctxt);
+
//Updater configuration changed after 0.8.0 release
//Previously: enumerations and fields. Now: classes
//Here, we manually create the appropriate Updater instances, if the IUpdater field is empty
@@ -41,7 +50,56 @@ public ComputationGraphConfiguration deserialize(JsonParser jp, DeserializationC
}
Layer[] layers = layerList.toArray(new Layer[layerList.size()]);
- handleUpdaterBackwardCompatibility(layers);
+ //Now, check if we need to manually handle IUpdater deserialization from legacy format
+ boolean attemptIUpdaterFromLegacy = requiresIUpdaterFromLegacy(layers);
+
+ if(attemptIUpdaterFromLegacy) {
+ JsonLocation endLocation = jp.getCurrentLocation();
+ long charOffsetEnd = endLocation.getCharOffset();
+ String jsonSubString = endLocation.getSourceRef().toString().substring((int) charOffsetStart - 1, (int) charOffsetEnd);
+
+ ObjectMapper om = NeuralNetConfiguration.mapper();
+ JsonNode rootNode = om.readTree(jsonSubString);
+
+ ObjectNode verticesNode = (ObjectNode) rootNode.get("vertices");
+ Iterator iter = verticesNode.elements();
+ int layerIdx = 0;
+ while(iter.hasNext()){
+ JsonNode next = iter.next();
+ ObjectNode confNode = null;
+ if(next.has("LayerVertex")){
+ next = next.get("LayerVertex");
+ if(next.has("layerConf")){
+ confNode = (ObjectNode) next.get("layerConf");
+ next = confNode.get("layer").elements().next();
+ } else {
+ continue;
+ }
+
+ if(layers[layerIdx] instanceof BaseLayer && ((BaseLayer)layers[layerIdx]).getIUpdater() == null){
+ handleUpdaterBackwardCompatibility((BaseLayer)layers[layerIdx], (ObjectNode)next);
+ }
+
+ if(layers[layerIdx].getIDropout() == null){
+ //Check for legacy dropout
+ if(next.has("dropOut")){
+ double d = next.get("dropOut").asDouble();
+ if(!Double.isNaN(d)){
+ //Might be dropout or dropconnect...
+ if(layers[layerIdx] instanceof BaseLayer && confNode.has("useDropConnect")
+ && confNode.get("useDropConnect").asBoolean(false)){
+ ((BaseLayer)layers[layerIdx]).setWeightNoise(new DropConnect(d));
+ } else {
+ layers[layerIdx].setIDropout(new Dropout(d));
+ }
+ }
+ }
+ }
+
+ layerIdx++;
+ }
+ }
+ }
return conf;
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/MultiLayerConfigurationDeserializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/MultiLayerConfigurationDeserializer.java
index 6ec48654580e..e7e01ae4980b 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/MultiLayerConfigurationDeserializer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/MultiLayerConfigurationDeserializer.java
@@ -1,11 +1,19 @@
package org.deeplearning4j.nn.conf.serde;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
+import org.deeplearning4j.nn.conf.layers.BaseLayer;
import org.deeplearning4j.nn.conf.layers.Layer;
+import org.deeplearning4j.nn.conf.weightnoise.DropConnect;
+import org.nd4j.shade.jackson.core.JsonLocation;
import org.nd4j.shade.jackson.core.JsonParser;
-import org.nd4j.shade.jackson.core.JsonProcessingException;
import org.nd4j.shade.jackson.databind.DeserializationContext;
import org.nd4j.shade.jackson.databind.JsonDeserializer;
+import org.nd4j.shade.jackson.databind.JsonNode;
+import org.nd4j.shade.jackson.databind.ObjectMapper;
+import org.nd4j.shade.jackson.databind.node.ArrayNode;
+import org.nd4j.shade.jackson.databind.node.ObjectNode;
import java.io.IOException;
@@ -16,20 +24,63 @@ public MultiLayerConfigurationDeserializer(JsonDeserializer> defaultDeserializ
}
@Override
- public MultiLayerConfiguration deserialize(JsonParser jp, DeserializationContext ctxt)
- throws IOException, JsonProcessingException {
- MultiLayerConfiguration conf = (MultiLayerConfiguration) defaultDeserializer.deserialize(jp, ctxt);
-
- //Updater configuration changed after 0.8.0 release
- //Previously: enumerations and fields. Now: classes
- //Here, we manually create the appropriate Updater instances, if the IUpdater field is empty
+ public MultiLayerConfiguration deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException {
+ long charOffsetStart = jp.getCurrentLocation().getCharOffset();
+ MultiLayerConfiguration conf = (MultiLayerConfiguration) defaultDeserializer.deserialize(jp, ctxt);
Layer[] layers = new Layer[conf.getConfs().size()];
for (int i = 0; i < layers.length; i++) {
layers[i] = conf.getConf(i).getLayer();
}
- handleUpdaterBackwardCompatibility(layers);
+ //Now, check if we need to manually handle IUpdater deserialization from legacy format
+ boolean attemptIUpdaterFromLegacy = requiresIUpdaterFromLegacy(layers);
+
+
+ if(attemptIUpdaterFromLegacy) {
+ JsonLocation endLocation = jp.getCurrentLocation();
+ long charOffsetEnd = endLocation.getCharOffset();
+ String jsonSubString = endLocation.getSourceRef().toString().substring((int) charOffsetStart - 1, (int) charOffsetEnd);
+
+ ObjectMapper om = NeuralNetConfiguration.mapper();
+ JsonNode rootNode = om.readTree(jsonSubString);
+
+ ArrayNode confsNode = (ArrayNode)rootNode.get("confs");
+
+ for( int i=0; i (first/only child) -> updater
+ if(on.has("layer")){
+ confNode = on;
+ on = (ObjectNode) on.get("layer");
+ } else {
+ continue;
+ }
+ on = (ObjectNode) on.elements().next();
+
+ handleUpdaterBackwardCompatibility((BaseLayer)layers[i], on);
+ }
+
+ if(layers[i].getIDropout() == null){
+ //Check for legacy dropout/dropconnect
+ if(on.has("dropOut")){
+ double d = on.get("dropOut").asDouble();
+ if(!Double.isNaN(d)){
+ //Might be dropout or dropconnect...
+ if(confNode != null && layers[i] instanceof BaseLayer && confNode.has("useDropConnect")
+ && confNode.get("useDropConnect").asBoolean(false)){
+ ((BaseLayer)layers[i]).setWeightNoise(new DropConnect(d));
+ } else {
+ layers[i].setIDropout(new Dropout(d));
+ }
+ }
+ }
+ }
+ }
+ }
+
return conf;
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java
new file mode 100644
index 000000000000..876ed509453c
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java
@@ -0,0 +1,87 @@
+package org.deeplearning4j.nn.conf.weightnoise;
+
+import lombok.Data;
+import org.deeplearning4j.nn.api.Layer;
+import org.deeplearning4j.nn.api.ParamInitializer;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.random.impl.DropOut;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.schedule.ISchedule;
+import org.nd4j.shade.jackson.annotation.JsonProperty;
+
+/**
+ * DropConnect, based on Wan et. al 2013 - "Regularization of Neural Networks using DropConnect"
+ * Sets weights randomly to 0 with some probability, or leaves them unchanged.
+ *
+ * @author Alex Black
+ */
+@Data
+public class DropConnect implements IWeightNoise {
+
+ private double weightRetainProb;
+ private ISchedule weightRetainProbSchedule;
+ private boolean applyToBiases;
+
+ /**
+ * @param weightRetainProbability Probability of retaining a weight
+ */
+ public DropConnect(double weightRetainProbability) {
+ this(weightRetainProbability, false);
+ }
+
+ /**
+ * @param weightRetainProbability Probability of retaining a weight
+ * @param applyToBiases If true: apply to biases (default: weights only)
+ */
+ public DropConnect(double weightRetainProbability, boolean applyToBiases) {
+ this(weightRetainProbability, null, applyToBiases);
+ }
+
+ /**
+ * @param weightRetainProbSchedule Probability (schedule) of retaining a weight
+ */
+ public DropConnect(ISchedule weightRetainProbSchedule){
+ this(Double.NaN, weightRetainProbSchedule, false);
+ }
+
+ /**
+ * @param weightRetainProbSchedule Probability (schedule) of retaining a weight
+ * @param applyToBiases If true: apply to biases (default: weights only)
+ */
+ public DropConnect(ISchedule weightRetainProbSchedule, boolean applyToBiases){
+ this(Double.NaN, weightRetainProbSchedule, applyToBiases);
+ }
+
+ private DropConnect(@JsonProperty("weightRetainProbability") double weightRetainProbability,
+ @JsonProperty("weightRetainProbSchedule") ISchedule weightRetainProbSchedule,
+ @JsonProperty("applyToBiases") boolean applyToBiases) {
+ this.weightRetainProb = weightRetainProbability;
+ this.weightRetainProbSchedule = weightRetainProbSchedule;
+ this.applyToBiases = applyToBiases;
+ }
+
+ @Override
+ public INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train) {
+ ParamInitializer init = layer.conf().getLayer().initializer();
+ INDArray param = layer.getParam(paramKey);
+
+ double p;
+ if(weightRetainProbSchedule == null){
+ p = weightRetainProb;
+ } else {
+ p = weightRetainProbSchedule.valueAt(iteration, epoch);
+ }
+
+ if (train && init.isWeightParam(paramKey) || (applyToBiases && init.isBiasParam(paramKey))) {
+ INDArray out = Nd4j.createUninitialized(param.shape(), param.ordering());
+ Nd4j.getExecutioner().exec(new DropOut(param, out, p));
+ return out;
+ }
+ return param;
+ }
+
+ @Override
+ public DropConnect clone() {
+ return new DropConnect(weightRetainProb, weightRetainProbSchedule, applyToBiases);
+ }
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java
new file mode 100644
index 000000000000..fd8c7dc83dfd
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java
@@ -0,0 +1,36 @@
+package org.deeplearning4j.nn.conf.weightnoise;
+
+import org.deeplearning4j.nn.api.Layer;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.shade.jackson.annotation.JsonTypeInfo;
+
+import java.io.Serializable;
+
+/**
+ * IWeightNoise instances operate on an weight array(s), modifying values at training time or test
+ * time, before they are used. Note that the weights are copied before being modified - the original parameters
+ * are not changed. However, if the pameters are not changed, the original array is returned.
+ *
+ * This interface can be used to implement functionality like DropConnect, weight quantization and weight
+ * noise.
+ *
+ * @author Alex Black
+ */
+@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class")
+public interface IWeightNoise extends Serializable, Cloneable{
+
+ /**
+ * Get the parameter, after applying weight noise
+ *
+ * @param layer Layer to get the parameter for
+ * @param paramKey Parameter key
+ * @param iteration Iteration number
+ * @param epoch Epoch number
+ * @param train If true: training. False: at test time
+ * @return Parameter, after applying weight noise
+ */
+ INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train);
+
+ IWeightNoise clone();
+
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java
new file mode 100644
index 000000000000..a1d66a79e10f
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java
@@ -0,0 +1,83 @@
+package org.deeplearning4j.nn.conf.weightnoise;
+
+import lombok.Data;
+import org.deeplearning4j.nn.api.Layer;
+import org.deeplearning4j.nn.api.ParamInitializer;
+import org.deeplearning4j.nn.conf.distribution.Distribution;
+import org.deeplearning4j.nn.conf.distribution.Distributions;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.impl.transforms.arithmetic.AddOp;
+import org.nd4j.linalg.api.ops.impl.transforms.arithmetic.MulOp;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.shade.jackson.annotation.JsonProperty;
+
+/**
+ * Apply noise of the specified distribution to the weights at training time.
+ * Note that both additive and multiplicative modes are supported - when additive, noise should be mean 0,
+ * when multiplicative, noise should be mean 1.
+ * That is, additive noise: x = x + noise
+ * multiplicative noise: x = x * noise
+ *
+ * @author Alex Black
+ */
+@Data
+public class WeightNoise implements IWeightNoise {
+
+ private Distribution distribution;
+ private boolean applyToBias;
+ private boolean additive;
+
+ /**
+ * @param distribution Distribution for additive noise
+ */
+ public WeightNoise(Distribution distribution) {
+ this(distribution, false, true);
+ }
+
+ /**
+ * @param distribution Distribution for noise
+ * @param additive If true: noise is added to weights. If false: noise is multiplied by weights
+ */
+ public WeightNoise(Distribution distribution, boolean additive) {
+ this(distribution, false, additive);
+ }
+
+ /**
+ * @param distribution Distribution for noise
+ * @param applyToBias If true: apply to biases also. If false (default): apply only to weights
+ * @param additive If true: noise is added to weights. If false: noise is multiplied by weights
+ */
+ public WeightNoise(@JsonProperty("distribution") Distribution distribution,
+ @JsonProperty("applyToBias") boolean applyToBias,
+ @JsonProperty("additive") boolean additive) {
+ this.distribution = distribution;
+ this.applyToBias = applyToBias;
+ this.additive = additive;
+ }
+
+ @Override
+ public INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train) {
+
+ ParamInitializer init = layer.conf().getLayer().initializer();
+ INDArray param = layer.getParam(paramKey);
+ if (train && init.isWeightParam(paramKey) || (applyToBias && init.isBiasParam(paramKey))) {
+
+ org.nd4j.linalg.api.rng.distribution.Distribution dist = Distributions.createDistribution(distribution);
+ INDArray noise = dist.sample(param.shape());
+ INDArray out = Nd4j.createUninitialized(param.shape(), param.ordering());
+
+ if (additive) {
+ Nd4j.getExecutioner().exec(new AddOp(param, noise, out));
+ } else {
+ Nd4j.getExecutioner().exec(new MulOp(param, noise, out));
+ }
+ return out;
+ }
+ return param;
+ }
+
+ @Override
+ public WeightNoise clone() {
+ return new WeightNoise(distribution, applyToBias, additive);
+ }
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java
index d2c315080abc..c41498700ee5 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java
@@ -20,6 +20,7 @@
import lombok.Getter;
import lombok.Setter;
+import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.deeplearning4j.datasets.iterator.AsyncDataSetIterator;
@@ -72,8 +73,6 @@
import org.nd4j.linalg.memory.abstracts.DummyWorkspace;
import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.primitives.Triple;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.*;
@@ -84,10 +83,9 @@
*
* @author Alex Black
*/
+@Slf4j
public class ComputationGraph implements Serializable, Model, NeuralNetwork {
- private static final Logger log = LoggerFactory.getLogger(ComputationGraph.class);
-
protected ComputationGraphConfiguration configuration;
protected boolean initCalled = false;
protected transient Solver solver; //Used to call optimizers during backprop
@@ -598,6 +596,7 @@ public void init(INDArray parameters, boolean cloneParametersArray) {
}
}
+ synchronizeIterEpochCounts();
initCalled = true;
}
@@ -881,6 +880,10 @@ public void fit(DataSetIterator iterator) {
} else
dataSetIterator = iterator;
+ if(!iterator.hasNext() && iterator.resetSupported()){
+ iterator.reset();
+ }
+
if (trainingListeners.size() > 0) {
for (TrainingListener tl : trainingListeners) {
tl.onEpochStart(this);
@@ -1293,6 +1296,7 @@ public int[] topologicalSortOrder() {
@Override
public void computeGradientAndScore() {
+ synchronizeIterEpochCounts();
//Calculate activations (which are stored in each layer, and used in backprop)
if (configuration.getBackpropType() == BackpropType.TruncatedBPTT) {
Map activations = rnnActivateUsingStoredState(inputs, true, true);
@@ -1339,6 +1343,11 @@ public void computeGradientAndScore() {
}
}
}
+
+ //Clear the fields (inc. post noise/dropconnect parameters) on the output layers
+ for( int i=0; i Type of the IEvaluation instance
* @return The input IEvaluation instance, after performing evaluation on the test data
*/
@@ -3177,7 +3181,6 @@ public String summary(InputType... inputTypes) {
String in = "-";
String out = "-";
String paramShape = "-";
-
if (currentVertex.isInputVertex()) {
if (inputTypes != null) vertexOutputs.put(currentVertexName, inputTypes[configuration.getNetworkInputs().indexOf(currentVertexName)]); //for input vertices the outputs are just the input types (only layer vertices have preprocessing?)
} else {
@@ -3193,7 +3196,7 @@ public String summary(InputType... inputTypes) {
paramShape = "";
in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn());
out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut());
- Set paraNames = currentLayer.conf().getLearningRateByParam().keySet();
+ List paraNames = currentLayer.conf().variables();
for (String aP : paraNames) {
String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape());
paramShape += aP + ":" + paramS + ", ";
@@ -3288,6 +3291,16 @@ public void incrementEpochCount(){
configuration.setEpochCount(configuration.getEpochCount() + 1);
}
+ protected void synchronizeIterEpochCounts(){
+ //TODO: this is necessrry for some schedules - but the redundant values are a little ugly...
+ int currIter = getConfiguration().getIterationCount();
+ int currEpoch = getConfiguration().getEpochCount();
+ for(Layer l : layers){
+ l.setIterationCount(currIter);
+ l.setEpochCount(currEpoch);
+ }
+ }
+
/**
* Indicates whether some other object is "equal to" this one.
*
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java
index 15ec394888f2..e5e1234399b9 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java
@@ -18,11 +18,11 @@
package org.deeplearning4j.nn.graph.vertex;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.io.Serializable;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java
index 67d1fd572660..7d8def6fcf66 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -28,6 +27,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.impl.transforms.Or;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
/** An ElementWiseVertex is used to combine the activations of two or more layer in an element-wise manner
* For example, the activations may be combined by addition, subtraction or multiplication.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java
index 4796c96546a9..b08e04020f43 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -26,6 +25,7 @@
import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex;
import org.deeplearning4j.nn.graph.vertex.VertexIndices;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/** An InputVertex simply defines the location (and connection structure) of inputs to the ComputationGraph.
* It does not define forward or backward methods.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java
index 63c5ccef577c..fdc2e3f0ef79 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -30,6 +29,7 @@
import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
/**
* L2NormalizeVertex performs L2 normalization on a single input.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java
index d260136807bf..79fc7d405e91 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -30,6 +29,7 @@
import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
/**
* L2Vertex calculates the L2 least squares error of two inputs.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java
index 668368f5ec2b..da12c499d8c8 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java
@@ -20,7 +20,6 @@
import lombok.Data;
import lombok.EqualsAndHashCode;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.api.layers.IOutputLayer;
@@ -33,6 +32,7 @@
import org.deeplearning4j.nn.layers.BaseOutputLayer;
import org.deeplearning4j.nn.layers.FrozenLayer;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java
index 571b18f2d891..e1d3aff9123f 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -29,6 +28,7 @@
import org.nd4j.linalg.api.ops.impl.transforms.Or;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java
index 7a3f95b11f40..cd818461ce28 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -29,6 +28,7 @@
import org.nd4j.linalg.api.ops.impl.transforms.Or;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
/**
* A custom layer for removing the first column and row from an input. This is meant to allow
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java
index 692342111621..25f821c01c00 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.InputPreProcessor;
@@ -27,6 +26,7 @@
import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex;
import org.deeplearning4j.nn.graph.vertex.VertexIndices;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/** PreprocessorVertex is a simple adaptor class that allows a {@link InputPreProcessor} to be used in a ComputationGraph
* GraphVertex, without it being associated with a layer.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java
index a3c75160e87e..fb6f346f9f09 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -26,8 +25,7 @@
import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex;
import org.deeplearning4j.nn.graph.vertex.VertexIndices;
import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.impl.transforms.Or;
-import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
/**
* Adds the ability to reshape and flatten the tensor in the computation graph. This is the equivalent
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java
index afc495365d37..b4ae622a8eee 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -26,6 +25,7 @@
import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex;
import org.deeplearning4j.nn.graph.vertex.VertexIndices;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* A ScaleVertex is used to scale the size of activations of a single layer
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java
index 0addc6bff2d7..1ad40b3e2bf0 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -26,6 +25,7 @@
import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex;
import org.deeplearning4j.nn.graph.vertex.VertexIndices;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* A ShiftVertex is used to shift the activations of a single layer
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java
index 2e506203aabb..526686fe100d 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -29,6 +28,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
/**
* StackVertex allows for stacking of inputs so that they may be forwarded through
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java
index 4d8a45297655..d49953f0ac76 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -29,6 +28,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java
index 09c63ad7e4b4..f203ae2462a4 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -29,6 +28,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java
index c1b1bd0a4c6c..783f8c8299b4 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl.rnn;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -29,6 +28,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
/**DuplicateToTimeSeriesVertex is a vertex that goes from 2d activations to a 3d time series activations, by means of
* duplication. That is, given a 2d input with shape [numExamples,nIn] duplicate each row to give output of
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java
index 75c23f2cd0c1..856b631c55ef 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.graph.vertex.impl.rnn;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -29,6 +28,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
/** LastTimeStepVertex is used in the context of recurrent neural network activations, to go from 3d (time series)
* activations to 2d activations, by extracting out the last time step of activations for each example.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
index f4bd6a61bf4d..7a33fd96e11a 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
@@ -20,20 +20,19 @@
import lombok.Data;
import lombok.NoArgsConstructor;
-import org.deeplearning4j.nn.api.layers.LayerConstraint;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
+import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.CacheMode;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.optimize.api.ConvexOptimizer;
import org.deeplearning4j.optimize.api.IterationListener;
-import org.deeplearning4j.util.Dropout;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.*;
@@ -54,6 +53,9 @@ public abstract class AbstractLayer 0 && !conf.isUseDropConnect() && training && !dropoutApplied) {
+ protected void applyDropOutIfNecessary(boolean training){//} int iteration, int epoch) {
+ if(training && !dropoutApplied && layerConf().getIDropout() != null ){
+ //TODO: Epoch + iteration counters...
if (Nd4j.getWorkspaceManager().checkIfWorkspaceExists(ComputationGraph.workspaceExternal)) {
try (MemoryWorkspace ws = Nd4j.getWorkspaceManager()
- .getWorkspaceForCurrentThread(ComputationGraph.workspaceExternal)
- .notifyScopeBorrowed()) {
- input = input.isView() ? input.dup() : input.unsafeDuplication();
+ .getWorkspaceForCurrentThread(ComputationGraph.workspaceExternal)
+ .notifyScopeBorrowed()) {
+ input = layerConf().getIDropout().applyDropout(input, getIterationCount(), getEpochCount(), false);
}
- } else
- input = input.isView() ? input.dup() : input.unsafeDuplication();
-
- Dropout.applyDropout(input, layerConf().getDropOut());
+ } else {
+ input = layerConf().getIDropout().applyDropout(input, getIterationCount(), getEpochCount(), false);
+ }
dropoutApplied = true;
}
}
- /**
- * Averages the given logistic regression from a mini batch into this layer
- * @param l the logistic regression layer to average into this layer
- * @param batchSize the batch size
- */
- @Override
- public void merge(Layer l, int batchSize) {
- throw new UnsupportedOperationException();
- }
-
@Override
public Type type() {
return Type.FEED_FORWARD;
@@ -435,11 +411,6 @@ public int getInputMiniBatchSize() {
return input.size(0);
}
- @Override
- public void applyLearningRateScoreDecay() {
- throw new UnsupportedOperationException("Not supported");
- }
-
@Override
public void setMaskArray(INDArray maskArray) {
this.maskArray = maskArray;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java
index d4fb2fe6b60f..6fbf9eff051e 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java
@@ -19,12 +19,12 @@
package org.deeplearning4j.nn.layers;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
@@ -109,20 +109,9 @@ public boolean isPretrainLayer() {
return false;
}
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray indArray) {
- throw new UnsupportedOperationException("Not supported - " + layerId());
- }
-
- @Override
- public void merge(Layer layer, int batchSize) {
- throw new UnsupportedOperationException("Not supported - " + layerId());
- }
-
@Override
- public INDArray activationMean() {
- return activate(false);
+ public void clearNoiseWeightParams() {
+ //No op
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
index b261509ac71d..c52d43091392 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.layers;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.exception.DL4JInvalidInputException;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@@ -28,10 +27,11 @@
import org.deeplearning4j.nn.params.PretrainParamInitializer;
import org.deeplearning4j.optimize.Solver;
import org.deeplearning4j.optimize.api.ConvexOptimizer;
-import org.deeplearning4j.util.Dropout;
+import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
import java.lang.reflect.Constructor;
import java.util.*;
@@ -52,6 +52,8 @@ public abstract class BaseLayer weightNoiseParams = new HashMap<>();
+
public BaseLayer(NeuralNetConfiguration conf) {
super(conf);
}
@@ -65,29 +67,6 @@ public LayerConfT layerConf() {
return (LayerConfT) this.conf.getLayer();
}
- @Override
- public Gradient error(INDArray errorSignal) {
- INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY);
- Gradient nextLayerGradient = new DefaultGradient();
- INDArray wErrorSignal = errorSignal.mmul(W.transpose());
- nextLayerGradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, wErrorSignal);
- return nextLayerGradient;
- }
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray activation) {
- Gradient ret = new DefaultGradient();
- INDArray weightErrorSignal = layerError.getGradientFor(DefaultParamInitializer.WEIGHT_KEY);
- INDArray weightError = weightErrorSignal.transpose().mmul(activation).transpose();
- ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightError);
- if(hasBias()){
- INDArray biasGradient = weightError.mean(0);
- ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradient);
- }
-
- return ret;
- }
-
@Override
public Pair backpropGradient(INDArray epsilon) {
//If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent)
@@ -113,7 +92,11 @@ public Pair backpropGradient(INDArray epsilon) {
ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad);
}
- INDArray epsilonNext = params.get(DefaultParamInitializer.WEIGHT_KEY).mmul(delta.transpose()).transpose();
+ INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true);
+
+ INDArray epsilonNext = W.mmul(delta.transpose()).transpose();
+
+ weightNoiseParams.clear();
return new Pair<>(ret, epsilonNext);
}
@@ -173,7 +156,6 @@ public Gradient gradient() {
*/
@Override
public void iterate(INDArray input) {
- setInput(input.dup());
applyDropOutIfNecessary(true);
Gradient gradient = gradient();
for (String paramType : gradient.gradientForVariable().keySet()) {
@@ -296,10 +278,44 @@ public Map paramTable(boolean backpropParamsOnly) {
return params;
}
+ /**
+ * Get the parameter, after applying any weight noise (such as DropConnect) if necessary.
+ * Note that during training, this will store the post-noise parameters, as these should be used
+ * for both forward pass and backprop, for a single iteration.
+ * Consequently, the parameters (post noise) should be cleared after each training iteration
+ *
+ * @param param Parameter key
+ * @param training If true: during training
+ * @return The parameter, after applying any noise
+ */
+ protected INDArray getParamWithNoise(String param, boolean training){
+ INDArray p;
+ if(layerConf().getWeightNoise() != null){
+ if(training && weightNoiseParams.size() > 0 && weightNoiseParams.containsKey(param) ){
+ //Re-use these weights for both forward pass and backprop - don't want to use 2 different params here
+ //These should be cleared during backprop
+ return weightNoiseParams.get(param);
+ } else {
+ try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
+ p = layerConf().getWeightNoise().getParameter(this, param, getIterationCount(), getEpochCount(), training);
+ }
+ }
+
+ if(training){
+ //Store for re-use in backprop
+ weightNoiseParams.put(param, p);
+ }
+ } else {
+ return getParam(param);
+ }
+
+ return p;
+ }
+
public INDArray preOutput(boolean training) {
applyDropOutIfNecessary(training);
- INDArray b = getParam(DefaultParamInitializer.BIAS_KEY);
- INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY);
+ INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training);
+ INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training);
//Input validation:
if (input.rank() != 2 || input.columns() != W.rows()) {
@@ -314,9 +330,6 @@ public INDArray preOutput(boolean training) {
+ W.size(0) + ") " + layerId());
}
- if (conf.isUseDropConnect() && training && layerConf().getDropOut() > 0) {
- W = Dropout.applyDropConnect(this, DefaultParamInitializer.WEIGHT_KEY);
- }
INDArray ret = input.mmul(W);
if(hasBias()){
@@ -371,29 +384,6 @@ public double calcL1(boolean backpropParamsOnly) {
return l1Sum;
}
-
- @Override
- public INDArray activationMean() {
- INDArray b = getParam(DefaultParamInitializer.BIAS_KEY);
- INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY);
- INDArray ret = input().mmul(W);
- if(hasBias()){
- ret.addiRowVector(b);
- }
- return ret;
- }
-
- /**
- * Averages the given logistic regression from a mini batch into this layer
- * @param l the logistic regression layer to average into this layer
- * @param batchSize the batch size
- */
- @Override
- public void merge(Layer l, int batchSize) {
- setParams(params().addi(l.params().divi(batchSize)));
- computeGradientAndScore();
- }
-
@Override
public Layer clone() {
Layer layer = null;
@@ -429,7 +419,7 @@ public int numParams() {
@Override
public void fit(INDArray input) {
if (input != null) {
- setInput(input.dup());
+ setInput(input);
applyDropOutIfNecessary(true);
}
if (solver == null) {
@@ -499,10 +489,14 @@ public void accumulateScore(double accum) {
}
@Override
- public void applyLearningRateScoreDecay() {
- for (Map.Entry lrPair : conf.getLearningRateByParam().entrySet())
- conf.setLearningRateByParam(lrPair.getKey(),
- lrPair.getValue() * (conf.getLrPolicyDecayRate() + Nd4j.EPS_THRESHOLD));
+ public void clear(){
+ super.clear();
+ weightNoiseParams.clear();
+ }
+
+ @Override
+ public void clearNoiseWeightParams(){
+ weightNoiseParams.clear();;
}
/**
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java
index eeb79e8ce2a3..11ce37469be6 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.layers;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.api.Updater;
@@ -33,6 +32,7 @@
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.ILossFunction;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.util.FeatureUtil;
import java.io.Serializable;
@@ -148,7 +148,11 @@ public Pair backpropGradient(INDArray epsilon) {
Pair pair = getGradientsAndDelta(preOutput2d(true)); //Returns Gradient and delta^(this), not Gradient and epsilon^(this-1)
INDArray delta = pair.getSecond();
- INDArray epsilonNext = params.get(DefaultParamInitializer.WEIGHT_KEY).mmul(delta.transpose()).transpose();
+ INDArray epsilonNext = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true).mmul(delta.transpose()).transpose();
+
+ //Normally we would clear weightNoiseParams here - but we want to reuse them for forward + backward + score
+ // So this is instead done in MultiLayerNetwork/CompGraph backprop methods
+
return new Pair<>(pair.getFirst(), epsilonNext);
}
@@ -333,7 +337,7 @@ public void fit(INDArray input, INDArray labels) {
int updaterStateSize = 0;
Map paramTable = paramTable();
for (Map.Entry entry : paramTable.entrySet()) {
- updaterStateSize += (int) conf().getLayer().getIUpdaterByParam(entry.getKey())
+ updaterStateSize += (int) conf().getLayer().getUpdaterByParam(entry.getKey())
.stateSize(entry.getValue().length());
}
if (updaterStateSize > 0)
@@ -369,10 +373,7 @@ public void fit(INDArray examples, int[] labels) {
@Override
public void clear() {
super.clear();
- if (labels != null) {
- labels.data().destroy();
- labels = null;
- }
+ labels = null;
solver = null;
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java
index 4cf35461cac0..cd0d92459470 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java
@@ -19,7 +19,6 @@
package org.deeplearning4j.nn.layers;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -29,6 +28,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.ILossFunction;
+import org.nd4j.linalg.primitives.Pair;
import java.util.*;
@@ -212,6 +212,9 @@ public Pair backpropGradient(INDArray epsilon) {
INDArray vBiasGradient = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY);
result.getFirst().gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vBiasGradient);
vBiasGradient.assign(0);
+
+ weightNoiseParams.clear();
+
return result;
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java
index 6dc8355da238..60744dfc8af2 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java
@@ -1,11 +1,11 @@
package org.deeplearning4j.nn.layers;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* Created by davekale on 12/7/16.
@@ -55,7 +55,7 @@ public INDArray preOutput(boolean training) {
if (input == null) {
throw new IllegalArgumentException("Cannot perform forward pass with null input " + layerId());
}
- applyDropOutIfNecessary(training); //Dups input if necessary
+ applyDropOutIfNecessary(training);
if (maskArray != null) {
input.muliColumnVector(maskArray);
@@ -80,17 +80,6 @@ public boolean isPretrainLayer() {
return false;
}
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray indArray) {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
- @Override
- public void merge(Layer layer, int batchSize) {
- throw new UnsupportedOperationException("Not supported - " + layerId());
- }
-
@Override
public INDArray params() {
return null;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java
index 0dd97c73b9b9..c690f3d14094 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java
@@ -1,7 +1,6 @@
package org.deeplearning4j.nn.layers;
import lombok.extern.slf4j.Slf4j;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.CacheMode;
@@ -12,6 +11,7 @@
import org.deeplearning4j.optimize.api.IterationListener;
import org.deeplearning4j.util.OneTimeLogger;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Collection;
import java.util.Map;
@@ -75,42 +75,12 @@ public Type type() {
return insideLayer.type();
}
- @Override
- public Gradient error(INDArray input) {
- if (!logGradient) {
- OneTimeLogger.info(log,
- "Gradients for the frozen layer are not set and will therefore will not be updated.Warning will be issued only once per instance");
- logGradient = true;
- }
- return zeroGradient;
- }
-
- @Override
- public INDArray derivativeActivation(INDArray input) {
- return insideLayer.derivativeActivation(input);
- }
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray indArray) {
- return zeroGradient;
- }
-
//FIXME
@Override
public Pair backpropGradient(INDArray epsilon) {
return new Pair<>(zeroGradient, null);
}
- @Override
- public void merge(Layer layer, int batchSize) {
- insideLayer.merge(layer, batchSize);
- }
-
- @Override
- public INDArray activationMean() {
- return insideLayer.activationMean();
- }
-
@Override
public INDArray preOutput(INDArray x) {
return insideLayer.preOutput(x);
@@ -281,11 +251,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) {
//no-op
}
- @Override
- public void applyLearningRateScoreDecay() {
- insideLayer.applyLearningRateScoreDecay();
- }
-
@Override
public void fit(INDArray data) {
if (!logFit) {
@@ -409,6 +374,26 @@ public int getIndex() {
return insideLayer.getIndex();
}
+ @Override
+ public int getIterationCount() {
+ return insideLayer.getIterationCount();
+ }
+
+ @Override
+ public int getEpochCount() {
+ return insideLayer.getEpochCount();
+ }
+
+ @Override
+ public void setIterationCount(int iterationCount) {
+ insideLayer.setIterationCount(iterationCount);
+ }
+
+ @Override
+ public void setEpochCount(int epochCount) {
+ insideLayer.setEpochCount(epochCount);
+ }
+
@Override
public void setInput(INDArray input) {
insideLayer.setInput(input);
@@ -439,6 +424,11 @@ public boolean isPretrainLayer() {
return insideLayer.isPretrainLayer();
}
+ @Override
+ public void clearNoiseWeightParams() {
+ insideLayer.clearNoiseWeightParams();
+ }
+
@Override
public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
int minibatchSize) {
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java
index 950a41246da6..d48a8347af89 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java
@@ -19,7 +19,6 @@
package org.deeplearning4j.nn.layers;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.layers.IOutputLayer;
@@ -32,6 +31,7 @@
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.ILossFunction;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.util.FeatureUtil;
import java.io.Serializable;
@@ -248,17 +248,6 @@ public boolean isPretrainLayer() {
return false;
}
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray indArray) {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
- @Override
- public void merge(Layer layer, int batchSize) {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
@Override
public INDArray params() {
return null;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java
index 1006c57a3609..192b02223a86 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java
@@ -1,10 +1,10 @@
package org.deeplearning4j.nn.layers.convolution;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.exception.DL4JInvalidInputException;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.Gradient;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java
index 8003f4b1570f..39da18d08d2d 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java
@@ -17,7 +17,6 @@
*/
package org.deeplearning4j.nn.layers.convolution;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.AlgoMode;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.BwdDataAlgo;
@@ -26,6 +25,7 @@
import org.deeplearning4j.nn.gradient.Gradient;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* Helper for the convolution layer.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
index 24bf0c8cc2d4..daf414d43151 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.layers.convolution;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.exception.DL4JInvalidInputException;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.CacheMode;
@@ -30,13 +29,13 @@
import org.deeplearning4j.nn.layers.BaseLayer;
import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
import org.deeplearning4j.util.ConvolutionUtils;
-import org.deeplearning4j.util.Dropout;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.convolution.Convolution;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -120,7 +119,7 @@ public Type type() {
@Override
public Pair backpropGradient(INDArray epsilon) {
- INDArray weights = getParam(ConvolutionParamInitializer.WEIGHT_KEY);
+ INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true);
int miniBatch = input.size(0);
int inH = input.size(2);
@@ -240,6 +239,8 @@ biasGradView, weightGradView, afn, layerConf().getCudnnAlgoMode(),
}
retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c');
+ weightNoiseParams.clear();
+
return new Pair<>(retGradient, epsNext);
}
@@ -267,11 +268,8 @@ public INDArray preOutput(boolean training) {
* @return Pair of arrays: preOutput (activations) and optionally the im2col2d array
*/
protected Pair preOutput(boolean training, boolean forBackprop) {
- INDArray weights = getParam(ConvolutionParamInitializer.WEIGHT_KEY);
- INDArray bias = getParam(ConvolutionParamInitializer.BIAS_KEY);
- if (conf.isUseDropConnect() && training && conf.getLayer().getDropOut() > 0) {
- weights = Dropout.applyDropConnect(this, ConvolutionParamInitializer.WEIGHT_KEY);
- }
+ INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training);
+ INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training);
//Input validation: expect rank 4 matrix
if (input.rank() != 4) {
@@ -451,20 +449,9 @@ public boolean isPretrainLayer() {
return false;
}
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray indArray) {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
@Override
public void fit(INDArray input) {}
- @Override
- public void merge(Layer layer, int batchSize) {
- throw new UnsupportedOperationException(layerId());
- }
-
@Override
public INDArray params() {
//C order flattening, to match the gradient flattening order
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java
index 384f982bf5fc..6fa510e53d43 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java
@@ -17,7 +17,6 @@
*/
package org.deeplearning4j.nn.layers.convolution;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
@@ -27,6 +26,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
/**
* Zero padding 1D layer for convolutional neural networks.
@@ -53,6 +53,11 @@ public boolean isPretrainLayer() {
return false;
}
+ @Override
+ public void clearNoiseWeightParams() {
+ //No op
+ }
+
@Override
public Type type() {
return Type.RECURRENT;
@@ -68,11 +73,6 @@ public Pair backpropGradient(INDArray epsilon) {
return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}
- @Override
- public INDArray activationMean() {
- throw new UnsupportedOperationException();
- }
-
@Override
public INDArray activate(boolean training) {
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java
index d5aacb26ebec..8f51c9928b99 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java
@@ -1,6 +1,5 @@
package org.deeplearning4j.nn.layers.convolution;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
@@ -10,6 +9,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
/**
* Zero padding layer for convolutional neural networks.
@@ -36,6 +36,11 @@ public boolean isPretrainLayer() {
return false;
}
+ @Override
+ public void clearNoiseWeightParams() {
+ //No op
+ }
+
@Override
public Type type() {
return Type.CONVOLUTIONAL;
@@ -52,11 +57,6 @@ public Pair backpropGradient(INDArray epsilon) {
return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}
- @Override
- public INDArray activationMean() {
- throw new UnsupportedOperationException();
- }
-
@Override
public INDArray activate(boolean training) {
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java
index 9ef57f3a48c3..9e46de835934 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java
@@ -1,10 +1,10 @@
package org.deeplearning4j.nn.layers.convolution.subsampling;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.exception.DL4JInvalidInputException;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.Gradient;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java
index 97de84f818f3..2611d66bcd9a 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java
@@ -17,11 +17,11 @@
*/
package org.deeplearning4j.nn.layers.convolution.subsampling;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.gradient.Gradient;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* Helper for the subsampling layer.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java
index 3570578fe3ab..58b063ee754b 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java
@@ -19,7 +19,6 @@
package org.deeplearning4j.nn.layers.convolution.subsampling;
import lombok.extern.slf4j.Slf4j;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.exception.DL4JInvalidInputException;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.ConvolutionMode;
@@ -29,7 +28,6 @@
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.layers.AbstractLayer;
import org.deeplearning4j.util.ConvolutionUtils;
-import org.deeplearning4j.util.Dropout;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.impl.transforms.IsMax;
import org.nd4j.linalg.api.ops.impl.transforms.convolution.Pooling2D;
@@ -37,6 +35,7 @@
import org.nd4j.linalg.convolution.Convolution;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.util.ArrayUtil;
import java.util.Arrays;
@@ -246,8 +245,8 @@ public Pair backpropGradient(INDArray epsilon) {
@Override
public INDArray activate(boolean training) {
- if (training && conf.getLayer().getDropOut() > 0) {
- Dropout.applyDropout(input, conf.getLayer().getDropOut());
+ if (training && !dropoutApplied && layerConf().getIDropout() != null) {
+ applyDropOutIfNecessary(true);
}
//Input validation: expect rank 4 matrix
@@ -337,27 +336,6 @@ public INDArray activate(boolean training) {
return output.reshape('c', miniBatch, inDepth, outH, outW);
}
- @Override
- public Gradient error(INDArray input) {
- throw new UnsupportedOperationException(layerId());
- }
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray indArray) {
- throw new UnsupportedOperationException(layerId());
- }
-
-
- @Override
- public void merge(Layer layer, int batchSize) {
- throw new UnsupportedOperationException(layerId());
- }
-
- @Override
- public INDArray activationMean() {
- return null;
- }
-
@Override
public Layer transpose() {
throw new UnsupportedOperationException(layerId());
@@ -373,6 +351,11 @@ public boolean isPretrainLayer() {
return false;
}
+ @Override
+ public void clearNoiseWeightParams() {
+ //no op
+ }
+
@Override
public void iterate(INDArray input) {
throw new UnsupportedOperationException(layerId());
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java
new file mode 100644
index 000000000000..3fd5cf6db62d
--- /dev/null
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java
@@ -0,0 +1,92 @@
+/*-
+ *
+ * * Copyright 2015 Skymind,Inc.
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+
+package org.deeplearning4j.nn.layers.convolution.upsampling;
+
+import lombok.extern.slf4j.Slf4j;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.layers.BaseUpsamplingLayer;
+import org.deeplearning4j.nn.gradient.Gradient;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
+
+
+/**
+ * 1D Upsampling layer.
+ *
+ * Used for upsampling a 1D convolution. Currently derived from 2D version.
+ * For forward and backward pass we add a dummy dimension, apply the 2D version
+ * and strip the extra dimension again. Eventually, we will want to migrate to a
+ * proper 1D version without this overhead.
+ *
+ * @author Max Pumperla
+ */
+@Slf4j
+public class Upsampling1D extends Upsampling2D {
+
+
+ public Upsampling1D(NeuralNetConfiguration conf) {
+ super(conf);
+ }
+
+ public Upsampling1D(NeuralNetConfiguration conf, INDArray input) {
+ super(conf, input);
+ }
+
+
+ @Override
+ public Pair backpropGradient(INDArray epsilon) {
+
+ int size = ((BaseUpsamplingLayer) layerConf()).getSize();
+ epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1);
+ // we replicate the error term times "size" so that backprop works properly on it
+ epsilon = epsilon.repeat(3, size);
+
+ INDArray originalInput = input;
+ input = input.reshape(input.size(0), input.size(1), input.size(2), 1);
+
+ Pair gradientEpsNext = super.backpropGradient(epsilon);
+ INDArray epsNext = gradientEpsNext.getSecond();
+ Gradient gradient = gradientEpsNext.getFirst();
+
+ epsNext = epsNext.slice(0, 3);
+ input = originalInput;
+
+ // Since we aggregate the gradient across "size" slices, we need to normalize afterwards.
+ return new Pair<>(gradient, epsNext.divi(size));
+ }
+
+ @Override
+ public INDArray preOutput(boolean training) {
+ return preOutput(training, false);
+ }
+
+ public INDArray preOutput(boolean training, boolean forBackprop) {
+ INDArray originalInput = input;
+ input = input.reshape(input.size(0), input.size(1), input.size(2), 1);
+
+ INDArray preOutput = super.preOutput(training, forBackprop);
+
+ input = originalInput;
+ preOutput = preOutput.slice(0, 3);
+
+ return preOutput;
+ }
+
+
+}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java
index 20a6223e5a25..aaeb91128326 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java
@@ -23,11 +23,11 @@
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.CacheMode;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.layers.BaseUpsamplingLayer;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.layers.AbstractLayer;
-import org.deeplearning4j.util.Dropout;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.CustomOp;
@@ -83,7 +83,7 @@ public Pair backpropGradient(INDArray epsilon) {
int inH = input.size(2);
int inW = input.size(3);
- int size = layerConf().getSize();
+ int size = ((BaseUpsamplingLayer) layerConf()).getSize();
INDArray outEpsilon = Nd4j.createUninitialized(miniBatch * inDepth * inH * inW);
INDArray reshapedEpsilon = outEpsilon.reshape('c', miniBatch, inDepth, inH, inW);
@@ -100,7 +100,6 @@ public Pair backpropGradient(INDArray epsilon) {
.build();
Nd4j.getExecutioner().exec(op);
-
return new Pair<>(gradient, reshapedEpsilon);
}
@@ -110,10 +109,7 @@ public INDArray preOutput(boolean training) {
}
public INDArray preOutput(boolean training, boolean forBackprop) {
-
- if (training && conf.getLayer().getDropOut() > 0) {
- Dropout.applyDropout(input, conf.getLayer().getDropOut());
- }
+ applyDropOutIfNecessary(training);
if (input.rank() != 4) {
throw new DL4JInvalidInputException("Got rank " + input.rank()
@@ -131,7 +127,7 @@ public INDArray preOutput(boolean training, boolean forBackprop) {
int inH = input.size(2);
int inW = input.size(3);
- int size = layerConf().getSize();
+ int size = ((BaseUpsamplingLayer) layerConf()).getSize();
int outH = inH * size;
int outW = inW * size;
@@ -152,10 +148,7 @@ public INDArray preOutput(boolean training, boolean forBackprop) {
@Override
public INDArray activate(boolean training) {
-
- if (training && conf.getLayer().getDropOut() > 0) {
- Dropout.applyDropout(input, conf.getLayer().getDropOut());
- }
+ applyDropOutIfNecessary(training);
if (cacheMode == null)
cacheMode = CacheMode.NONE;
@@ -173,27 +166,6 @@ public INDArray activate(boolean training) {
return z;
}
- @Override
- public Gradient error(INDArray input) {
- throw new UnsupportedOperationException(layerId());
- }
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray indArray) {
- throw new UnsupportedOperationException(layerId());
- }
-
-
- @Override
- public void merge(Layer layer, int batchSize) {
- throw new UnsupportedOperationException(layerId());
- }
-
- @Override
- public INDArray activationMean() {
- return null;
- }
-
@Override
public Layer transpose() {
throw new UnsupportedOperationException(layerId());
@@ -209,6 +181,11 @@ public boolean isPretrainLayer() {
return false;
}
+ @Override
+ public void clearNoiseWeightParams() {
+ //No op
+ }
+
@Override
public void iterate(INDArray input) {
throw new UnsupportedOperationException(layerId());
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java
index 164ff2c5e55b..a9d233412099 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java
@@ -18,12 +18,11 @@
package org.deeplearning4j.nn.layers.feedforward.autoencoder;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.layers.BasePretrainNetwork;
import org.deeplearning4j.nn.params.PretrainParamInitializer;
-import org.deeplearning4j.util.Dropout;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* Autoencoder.
@@ -58,11 +57,8 @@ public Pair sampleVisibleGivenHidden(INDArray h) {
// Encode
public INDArray encode(INDArray v, boolean training) {
- INDArray W = getParam(PretrainParamInitializer.WEIGHT_KEY);
- if (training && conf.isUseDropConnect() && conf.getLayer().getDropOut() > 0) {
- W = Dropout.applyDropConnect(this, PretrainParamInitializer.WEIGHT_KEY);
- }
- INDArray hBias = getParam(PretrainParamInitializer.BIAS_KEY);
+ INDArray W = getParamWithNoise(PretrainParamInitializer.WEIGHT_KEY, training);
+ INDArray hBias = getParamWithNoise(PretrainParamInitializer.BIAS_KEY, training);
INDArray preAct = v.mmul(W).addiRowVector(hBias);
//INDArray ret = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf.getLayer().getActivationFunction(), preAct));
@@ -73,8 +69,8 @@ public INDArray encode(INDArray v, boolean training) {
// Decode
public INDArray decode(INDArray y) {
- INDArray W = getParam(PretrainParamInitializer.WEIGHT_KEY);
- INDArray vBias = getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY);
+ INDArray W = getParamWithNoise(PretrainParamInitializer.WEIGHT_KEY, true);
+ INDArray vBias = getParamWithNoise(PretrainParamInitializer.VISIBLE_BIAS_KEY, true);
INDArray preAct = y.mmul(W.transposei()).addiRowVector(vBias);
//return Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf.getLayer().getActivationFunction(), preAct));
return layerConf().getActivationFn().getActivation(preAct, true);
@@ -110,7 +106,7 @@ public INDArray activate() {
@Override
public void computeGradientAndScore() {
- INDArray W = getParam(PretrainParamInitializer.WEIGHT_KEY);
+ INDArray W = getParamWithNoise(PretrainParamInitializer.WEIGHT_KEY, true);
double corruptionLevel = layerConf().getCorruptionLevel();
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java
index 174fc284d3fa..1248ae434d06 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java
@@ -29,6 +29,7 @@
import org.deeplearning4j.nn.params.DefaultParamInitializer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
/**Embedding layer: feed-forward layer that expects single integers per example as input (class numbers, in range 0 to numClass-1)
* as input. This input has shape [numExamples,1] instead of [numExamples,numClasses] for the equivalent one-hot representation.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBM.java
index 02ea87c4b33c..74c51a8d3f2d 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBM.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBM.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.layers.feedforward.rbm;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
@@ -27,11 +26,11 @@
import org.deeplearning4j.nn.params.DefaultParamInitializer;
import org.deeplearning4j.nn.params.PretrainParamInitializer;
import org.deeplearning4j.optimize.api.TrainingListener;
-import org.deeplearning4j.util.Dropout;
import org.deeplearning4j.util.RBMUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.rng.distribution.Distribution;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import static org.nd4j.linalg.ops.transforms.Transforms.*;
@@ -309,12 +308,9 @@ public Pair sampleVisibleGivenHidden(INDArray h) {
}
public INDArray preOutput(INDArray v, boolean training) {
- INDArray hBias = getParam(PretrainParamInitializer.BIAS_KEY);
- INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY);
- if (training && conf.isUseDropConnect() && conf.getLayer().getDropOut() > 0) {
- W = Dropout.applyDropConnect(this, DefaultParamInitializer.WEIGHT_KEY);
- }
- return v.mmul(W).addiRowVector(hBias);
+ INDArray weights = getParamWithNoise(PretrainParamInitializer.WEIGHT_KEY, training);
+ INDArray bias = getParamWithNoise(PretrainParamInitializer.BIAS_KEY, training);
+ return v.mmul(weights).addiRowVector(bias);
}
/**
@@ -422,8 +418,8 @@ public INDArray propDown(INDArray h) {
*/
@Override
public INDArray activate(boolean training) {
- if (training && conf.getLayer().getDropOut() > 0.0) {
- Dropout.applyDropout(input, conf.getLayer().getDropOut());
+ if (training && conf.getLayer().getIDropout() != null) {
+ applyDropOutIfNecessary(training);
}
//reconstructed: propUp ----> hidden propDown to transform
INDArray propUp = propUp(input, training);
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java
index 6ff23b94f388..9725c2ea0826 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java
@@ -1,7 +1,6 @@
package org.deeplearning4j.nn.layers.normalization;
import lombok.extern.slf4j.Slf4j;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
@@ -18,6 +17,7 @@
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.Arrays;
@@ -80,16 +80,6 @@ public Type type() {
return Type.NORMALIZATION;
}
- @Override
- public Gradient error(INDArray input) {
- return null;
- }
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray indArray) {
- return null;
- }
-
@Override
public Pair backpropGradient(INDArray epsilon) {
INDArray nextEpsilon;
@@ -219,11 +209,6 @@ public Pair backpropGradient(INDArray epsilon) {
return new Pair<>(retGradient, nextEpsilon);
}
- @Override
- public void merge(Layer layer, int batchSize) {
- throw new UnsupportedOperationException(layerId());
- }
-
@Override
public void fit(INDArray data) {}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java
index d388da5c50fe..b97e60be3662 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java
@@ -17,9 +17,9 @@
*/
package org.deeplearning4j.nn.layers.normalization;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.gradient.Gradient;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* Helper for the batch normalization layer.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java
index 59640b2fd5df..cb1636690e00 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java
@@ -1,6 +1,5 @@
package org.deeplearning4j.nn.layers.normalization;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
@@ -11,6 +10,7 @@
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -188,20 +188,9 @@ public boolean isPretrainLayer() {
return false;
}
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray indArray) {
- throw new UnsupportedOperationException("Not supported - " + layerId());
- }
-
- @Override
- public void merge(Layer layer, int batchSize) {
- throw new UnsupportedOperationException(layerId());
- }
-
@Override
- public INDArray activationMean() {
- return activate(false);
+ public void clearNoiseWeightParams() {
+ //No op
}
@Override
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java
index 0b6cc49378ab..c2bf9987e544 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java
@@ -17,9 +17,9 @@
*/
package org.deeplearning4j.nn.layers.normalization;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.gradient.Gradient;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
/**
* Helper for the local response normalization layer.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java
index da06606068c6..a820ee54e0a5 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java
@@ -16,7 +16,6 @@
import org.nd4j.linalg.activations.impl.ActivationSoftmax;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.LossFunction;
import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp;
import org.nd4j.linalg.api.ops.impl.transforms.IsMax;
import org.nd4j.linalg.api.ops.impl.transforms.Not;
@@ -325,11 +324,6 @@ private INDArray computeBackpropGradientAndScore(){
return epsOut;
}
- @Override
- public INDArray activationMean() {
- return activate();
- }
-
@Override
public INDArray activate(boolean training) {
//Essentially: just apply activation functions...
@@ -595,6 +589,11 @@ public boolean isPretrainLayer() {
return false;
}
+ @Override
+ public void clearNoiseWeightParams() {
+ //No op
+ }
+
/**
* Given the network output and a detection threshold (in range 0 to 1) determine the objects detected by
* the network.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java
index b1fecd76875a..c5edb84abc5f 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java
@@ -1,7 +1,6 @@
package org.deeplearning4j.nn.layers.pooling;
import org.apache.commons.lang3.ArrayUtils;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@@ -16,6 +15,7 @@
import org.nd4j.linalg.api.ops.impl.transforms.IsMax;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
@@ -72,6 +72,11 @@ public boolean isPretrainLayer() {
return false;
}
+ @Override
+ public void clearNoiseWeightParams() {
+ //No op
+ }
+
@Override
public double calcL2(boolean backpropParamsOnly) {
return 0;
@@ -272,11 +277,6 @@ public Pair backpropGradient(INDArray epsilon) {
return new Pair<>(retGradient, epsilonNd);
}
- @Override
- public INDArray activationMean() {
- throw new UnsupportedOperationException("Not supported");
- }
-
private INDArray epsilonHelperFullArray(INDArray inputArray, INDArray epsilon, int[] poolDim) {
//Broadcast: occurs on the remaining dimensions, after the pool dimensions have been removed.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java
index 64ef154d7760..9208b24aaced 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java
@@ -19,7 +19,6 @@
package org.deeplearning4j.nn.layers.recurrent;
import lombok.extern.slf4j.Slf4j;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.CacheMode;
@@ -28,6 +27,7 @@
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.params.GravesBidirectionalLSTMParamInitializer;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Map;
@@ -71,11 +71,6 @@ public Gradient gradient() {
throw new UnsupportedOperationException("Not supported " + layerId());
}
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray activation) {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
@Override
public Pair backpropGradient(INDArray epsilon) {
return backpropGradientHelper(epsilon, false, -1);
@@ -265,11 +260,6 @@ private FwdPassReturn activateHelperDirectional(final boolean training, final IN
}
}
- @Override
- public INDArray activationMean() {
- return activate();
- }
-
@Override
public Type type() {
return Type.RECURRENT;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java
index 25f6222aca97..7778addcb749 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java
@@ -19,7 +19,6 @@
package org.deeplearning4j.nn.layers.recurrent;
import lombok.extern.slf4j.Slf4j;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.CacheMode;
@@ -28,6 +27,7 @@
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.params.GravesLSTMParamInitializer;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Map;
@@ -64,11 +64,6 @@ public Gradient gradient() {
+ layerId());
}
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray activation) {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
@Override
public Pair backpropGradient(INDArray epsilon) {
return backpropGradientHelper(epsilon, false, -1);
@@ -83,8 +78,8 @@ public Pair tbpttBackpropGradient(INDArray epsilon, int tbpt
private Pair backpropGradientHelper(final INDArray epsilon, final boolean truncatedBPTT,
final int tbpttBackwardLength) {
- final INDArray inputWeights = getParam(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
- final INDArray recurrentWeights = getParam(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
+ final INDArray inputWeights = getParamWithNoise(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, true);
+ final INDArray recurrentWeights = getParamWithNoise(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY, true); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
//First: Do forward pass to get gate activations, zs etc.
FwdPassReturn fwdPass;
@@ -99,10 +94,14 @@ private Pair backpropGradientHelper(final INDArray epsilon,
}
- return LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input,
+ Pair p = LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input,
recurrentWeights, inputWeights, epsilon, truncatedBPTT, tbpttBackwardLength, fwdPass, true,
GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY,
GravesLSTMParamInitializer.BIAS_KEY, gradientViews, maskArray, true, null);
+
+ weightNoiseParams.clear();
+
+ return p;
}
@@ -152,9 +151,9 @@ private FwdPassReturn activateHelper(final boolean training, final INDArray prev
return ret;
}
- final INDArray recurrentWeights = getParam(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
- final INDArray inputWeights = getParam(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY); //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg]
- final INDArray biases = getParam(GravesLSTMParamInitializer.BIAS_KEY); //by row: IFOG //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T
+ final INDArray recurrentWeights = getParamWithNoise(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY, training); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
+ final INDArray inputWeights = getParamWithNoise(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, training); //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg]
+ final INDArray biases = getParamWithNoise(GravesLSTMParamInitializer.BIAS_KEY, training); //by row: IFOG //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T
FwdPassReturn fwd = LSTMHelpers.activateHelper(this, this.conf, this.layerConf().getGateActivationFn(),
this.input, recurrentWeights, inputWeights, biases, training, prevOutputActivations,
@@ -170,11 +169,6 @@ private FwdPassReturn activateHelper(final boolean training, final INDArray prev
return fwd;
}
- @Override
- public INDArray activationMean() {
- return activate();
- }
-
@Override
public Type type() {
return Type.RECURRENT;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java
index 4f8817652096..548073cce377 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java
@@ -19,7 +19,6 @@
package org.deeplearning4j.nn.layers.recurrent;
import lombok.extern.slf4j.Slf4j;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.CacheMode;
@@ -28,6 +27,7 @@
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.params.LSTMParamInitializer;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Map;
@@ -80,11 +80,6 @@ public Gradient gradient() {
+ layerId());
}
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray activation) {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
@Override
public Pair backpropGradient(INDArray epsilon) {
return backpropGradientHelper(epsilon, false, -1);
@@ -99,8 +94,8 @@ public Pair tbpttBackpropGradient(INDArray epsilon, int tbpt
private Pair backpropGradientHelper(final INDArray epsilon, final boolean truncatedBPTT,
final int tbpttBackwardLength) {
- final INDArray inputWeights = getParam(LSTMParamInitializer.INPUT_WEIGHT_KEY);
- final INDArray recurrentWeights = getParam(LSTMParamInitializer.RECURRENT_WEIGHT_KEY); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
+ final INDArray inputWeights = getParamWithNoise(LSTMParamInitializer.INPUT_WEIGHT_KEY, true);
+ final INDArray recurrentWeights = getParamWithNoise(LSTMParamInitializer.RECURRENT_WEIGHT_KEY, true); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
//First: Do forward pass to get gate activations, zs etc.
FwdPassReturn fwdPass;
@@ -115,10 +110,13 @@ private Pair backpropGradientHelper(final INDArray epsilon,
}
- return LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input,
+ Pair p = LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input,
recurrentWeights, inputWeights, epsilon, truncatedBPTT, tbpttBackwardLength, fwdPass, true,
LSTMParamInitializer.INPUT_WEIGHT_KEY, LSTMParamInitializer.RECURRENT_WEIGHT_KEY,
LSTMParamInitializer.BIAS_KEY, gradientViews, null, false, helper);
+
+ weightNoiseParams.clear();
+ return p;
}
@@ -168,9 +166,9 @@ private FwdPassReturn activateHelper(final boolean training, final INDArray prev
return ret;
}
- final INDArray recurrentWeights = getParam(LSTMParamInitializer.RECURRENT_WEIGHT_KEY); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
- final INDArray inputWeights = getParam(LSTMParamInitializer.INPUT_WEIGHT_KEY); //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg]
- final INDArray biases = getParam(LSTMParamInitializer.BIAS_KEY); //by row: IFOG //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T
+ final INDArray recurrentWeights = getParamWithNoise(LSTMParamInitializer.RECURRENT_WEIGHT_KEY, training); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
+ final INDArray inputWeights = getParamWithNoise(LSTMParamInitializer.INPUT_WEIGHT_KEY, training); //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg]
+ final INDArray biases = getParamWithNoise(LSTMParamInitializer.BIAS_KEY, training); //by row: IFOG //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T
FwdPassReturn fwd = LSTMHelpers.activateHelper(this, this.conf, this.layerConf().getGateActivationFn(),
this.input, recurrentWeights, inputWeights, biases, training, prevOutputActivations,
@@ -185,11 +183,6 @@ private FwdPassReturn activateHelper(final boolean training, final INDArray prev
return fwd;
}
- @Override
- public INDArray activationMean() {
- return activate();
- }
-
@Override
public Type type() {
return Type.RECURRENT;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java
index 1ec1f78a9bda..3dc95acf1967 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java
@@ -17,12 +17,12 @@
*/
package org.deeplearning4j.nn.layers.recurrent;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.Gradient;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Map;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java
index c96b86ab8fbe..eaaf533eea39 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java
@@ -1,7 +1,6 @@
package org.deeplearning4j.nn.layers.recurrent;
import lombok.extern.slf4j.Slf4j;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.exception.DL4JInvalidInputException;
import org.deeplearning4j.nn.conf.CacheMode;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@@ -14,7 +13,6 @@
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.layers.BaseLayer;
-import org.deeplearning4j.util.Dropout;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.activations.impl.ActivationSigmoid;
import org.nd4j.linalg.api.blas.Level1;
@@ -25,6 +23,7 @@
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
import java.util.HashMap;
@@ -99,14 +98,7 @@ static public FwdPassReturn activateHelper(final BaseLayer layer, final NeuralNe
}
- INDArray recurrentWeightsIFOG = recurrentWeights
- .get(NDArrayIndex.all(), NDArrayIndex.interval(0, 4 * hiddenLayerSize)).dup('f');
-
-
- //Apply dropconnect to input (not recurrent) weights only:
- if (conf.isUseDropConnect() && training && conf.getLayer().getDropOut() > 0) {
- inputWeights = Dropout.applyDropConnect(layer, inputWeightKey);
- }
+ INDArray recurrentWeightsIFOG = recurrentWeights.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 4 * hiddenLayerSize)).dup('f');
INDArray wFFTranspose = null;
INDArray wOOTranspose = null;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java
index 78e02467e0f0..dc736521f5c9 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java
@@ -17,20 +17,19 @@
*/
package org.deeplearning4j.nn.layers.recurrent;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.layers.BaseOutputLayer;
import org.deeplearning4j.nn.params.DefaultParamInitializer;
-import org.deeplearning4j.util.Dropout;
import org.deeplearning4j.util.TimeSeriesUtils;
import org.nd4j.linalg.activations.impl.ActivationSoftmax;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.impl.transforms.SoftMax;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.ILossFunction;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Arrays;
@@ -64,6 +63,9 @@ public Pair backpropGradient(INDArray epsilon) {
this.input = inputTemp;
INDArray epsilon2d = gradAndEpsilonNext.getSecond();
INDArray epsilon3d = TimeSeriesUtils.reshape2dTo3d(epsilon2d, input.size(0));
+
+ weightNoiseParams.clear();
+
return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d);
}
@@ -142,8 +144,7 @@ public INDArray output(boolean training) {
return TimeSeriesUtils.reshape2dTo3d(out2d, input.size(0));
}
- if (training)
- applyDropOutIfNecessary(training);
+ applyDropOutIfNecessary(training);
INDArray origInput = input;
this.input = TimeSeriesUtils.reshape3dTo2d(input);
INDArray out = super.activate(true);
@@ -159,11 +160,8 @@ public INDArray activate(boolean training) {
if (input.rank() != 3)
throw new UnsupportedOperationException(
"Input must be rank 3. Got input with rank " + input.rank() + " " + layerId());
- INDArray b = getParam(DefaultParamInitializer.BIAS_KEY);
- INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY);
- if (conf.isUseDropConnect() && training) {
- W = Dropout.applyDropConnect(this, DefaultParamInitializer.WEIGHT_KEY);
- }
+ INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training);
+ INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training);
INDArray input2d = TimeSeriesUtils.reshape3dTo2d(input);
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java
index 4699c3510dd0..a2e7f13ee371 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.nn.layers.training;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.exception.DL4JInvalidInputException;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
@@ -28,6 +27,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.ILossFunction;
+import org.nd4j.linalg.primitives.Pair;
/**
@@ -168,9 +168,14 @@ public Pair backpropGradient(INDArray epsilon) {
INDArray centersForExamples = labels.mmul(centers);
INDArray dLcdai = input.sub(centersForExamples);
- INDArray epsilonNext = params.get(CenterLossParamInitializer.WEIGHT_KEY).mmul(delta.transpose()).transpose();
+ INDArray w = getParamWithNoise(CenterLossParamInitializer.WEIGHT_KEY, true);
+
+ INDArray epsilonNext = w.mmul(delta.transpose()).transpose();
double lambda = layerConf().getLambda();
epsilonNext.addi(dLcdai.muli(lambda)); // add center loss here
+
+ weightNoiseParams.clear();
+
return new Pair<>(pair.getFirst(), epsilonNext);
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java
index 08350bebd177..186f7b5096b9 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java
@@ -3,10 +3,10 @@
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.Getter;
-import org.deeplearning4j.nn.api.layers.LayerConstraint;
-import org.nd4j.linalg.primitives.Pair;
+import lombok.Setter;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
+import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.CacheMode;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.layers.variational.CompositeReconstructionDistribution;
@@ -27,6 +27,7 @@
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.ops.transforms.Transforms;
+import org.nd4j.linalg.primitives.Pair;
import java.util.*;
@@ -74,6 +75,13 @@ public class VariationalAutoencoder implements Layer {
protected boolean zeroedPretrainParamGradients = false;
+ protected Map weightNoiseParams = new HashMap<>();
+
+ @Getter @Setter
+ protected int iterationCount;
+ @Getter @Setter
+ protected int epochCount;
+
public VariationalAutoencoder(NeuralNetConfiguration conf) {
this.conf = conf;
@@ -132,6 +140,30 @@ public double score() {
return score;
}
+ protected INDArray getParamWithNoise(String param, boolean training){
+ INDArray p;
+ if(layerConf().getWeightNoise() != null){
+ if(training && weightNoiseParams.size() > 0 ){
+ //Re-use these weights for both forward pass and backprop - don't want to use 2 different params here
+ //These should be cleared during backprop
+ return weightNoiseParams.get(param);
+ } else {
+ try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
+ p = layerConf().getWeightNoise().getParameter(this, param, getIterationCount(), getEpochCount(), training);
+ }
+ }
+
+ if(training){
+ //Store for re-use in backprop
+ weightNoiseParams.put(param, p);
+ }
+ } else {
+ return getParam(param);
+ }
+
+ return p;
+ }
+
@Override
public void computeGradientAndScore() {
//Forward pass through the encoder and mean for P(Z|X)
@@ -139,8 +171,8 @@ public void computeGradientAndScore() {
IActivation afn = layerConf().getActivationFn();
//Forward pass through logStd^2 for P(Z|X)
- INDArray pzxLogStd2W = params.get(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W);
- INDArray pzxLogStd2b = params.get(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_B);
+ INDArray pzxLogStd2W = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W, true);
+ INDArray pzxLogStd2b = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_B, true);
INDArray pzxLogStd2Pre = fwd.encoderActivations[fwd.encoderActivations.length - 1].mmul(pzxLogStd2W)
.addiRowVector(pzxLogStd2b);
@@ -178,8 +210,8 @@ public void computeGradientAndScore() {
String wKey = "d" + i + WEIGHT_KEY_SUFFIX;
String bKey = "d" + i + BIAS_KEY_SUFFIX;
- INDArray weights = params.get(wKey);
- INDArray bias = params.get(bKey);
+ INDArray weights = getParamWithNoise(wKey, true);
+ INDArray bias = getParamWithNoise(bKey, true);
current = current.mmul(weights).addiRowVector(bias);
decoderPreOut[i] = current.dup();
@@ -187,8 +219,8 @@ public void computeGradientAndScore() {
decoderActivations[i] = current;
}
- INDArray pxzw = params.get(VariationalAutoencoderParamInitializer.PXZ_W);
- INDArray pxzb = params.get(VariationalAutoencoderParamInitializer.PXZ_B);
+ INDArray pxzw = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_W, true);
+ INDArray pxzb = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_B, true);
if (l == 0) {
//Need to add other component of score, in addition to negative log probability
@@ -258,7 +290,7 @@ public void computeGradientAndScore() {
INDArray currentDelta = afn.backprop(decoderPreOut[i], epsilon).getFirst(); //TODO activation functions with params
- INDArray weights = params.get(wKey);
+ INDArray weights = getParamWithNoise(wKey, true);
INDArray dLdW = gradientViews.get(wKey);
INDArray dLdB = gradientViews.get(bKey);
@@ -287,8 +319,8 @@ public void computeGradientAndScore() {
}
//Do backprop through p(z|x)
- INDArray eZXMeanW = params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W);
- INDArray eZXLogStdev2W = params.get(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W);
+ INDArray eZXMeanW = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, true);
+ INDArray eZXLogStdev2W = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W, true);
INDArray dLdz = epsilon;
//If we were maximizing the equation in Kinga and Welling, this would be a .sub(meanZ). Here: we are minimizing the negative instead
@@ -344,7 +376,7 @@ public void computeGradientAndScore() {
String wKey = "e" + i + WEIGHT_KEY_SUFFIX;
String bKey = "e" + i + BIAS_KEY_SUFFIX;
- INDArray weights = params.get(wKey);
+ INDArray weights = getParamWithNoise(wKey, true);
INDArray dLdW = gradientViews.get(wKey);
INDArray dLdB = gradientViews.get(bKey);
@@ -418,6 +450,8 @@ public void computeGradientAndScore() {
g.put(VariationalAutoencoderParamInitializer.PXZ_B,
gradientMap.get(VariationalAutoencoderParamInitializer.PXZ_B));
+ weightNoiseParams.clear();
+
this.gradient = gradient;
}
@@ -481,11 +515,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) {
this.gradientViews = conf.getLayer().initializer().getGradientsFromFlattened(conf, gradients);
}
- @Override
- public void applyLearningRateScoreDecay() {
-
- }
-
@Override
public void fit(INDArray data) {
this.setInput(data);
@@ -631,21 +660,6 @@ public Type type() {
return Type.FEED_FORWARD;
}
- @Override
- public Gradient error(INDArray input) {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
- @Override
- public INDArray derivativeActivation(INDArray input) {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray indArray) {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
@Override
public Pair backpropGradient(INDArray epsilon) {
if (!zeroedPretrainParamGradients) {
@@ -663,7 +677,7 @@ public Pair backpropGradient(INDArray epsilon) {
INDArray currentDelta = pzxActivationFn.backprop(fwd.pzxMeanPreOut, epsilon).getFirst();
//Finally, calculate mean value:
- INDArray meanW = params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W);
+ INDArray meanW = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, true);
INDArray dLdMeanW = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W); //f order
INDArray lastEncoderActivation = fwd.encoderActivations[fwd.encoderActivations.length - 1];
Nd4j.gemm(lastEncoderActivation, currentDelta, dLdMeanW, true, false, 1.0, 0.0);
@@ -682,7 +696,7 @@ public Pair backpropGradient(INDArray epsilon) {
String wKey = "e" + i + WEIGHT_KEY_SUFFIX;
String bKey = "e" + i + BIAS_KEY_SUFFIX;
- INDArray weights = params.get(wKey);
+ INDArray weights = getParamWithNoise(wKey, true);
INDArray dLdW = gradientViews.get(wKey);
INDArray dLdB = gradientViews.get(bKey);
@@ -709,16 +723,6 @@ public Pair backpropGradient(INDArray epsilon) {
return new Pair<>(gradient, epsilon);
}
- @Override
- public void merge(Layer layer, int batchSize) {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
- @Override
- public INDArray activationMean() {
- throw new UnsupportedOperationException("Not supported " + layerId());
- }
-
@Override
public INDArray preOutput(INDArray x) {
return preOutput(x, TrainingMode.TEST);
@@ -765,8 +769,8 @@ private VAEFwdHelper doForward(boolean training, boolean forBackprop) {
String wKey = "e" + i + WEIGHT_KEY_SUFFIX;
String bKey = "e" + i + BIAS_KEY_SUFFIX;
- INDArray weights = params.get(wKey);
- INDArray bias = params.get(bKey);
+ INDArray weights = getParamWithNoise(wKey, training);
+ INDArray bias = getParamWithNoise(bKey, training);
current = current.mmul(weights).addiRowVector(bias);
if (forBackprop) {
@@ -777,8 +781,8 @@ private VAEFwdHelper doForward(boolean training, boolean forBackprop) {
}
//Finally, calculate mean value:
- INDArray mW = params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W);
- INDArray mB = params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_B);
+ INDArray mW = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, training);
+ INDArray mB = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_B, training);
INDArray pzxMean = current.mmul(mW).addiRowVector(mB);
@@ -921,6 +925,11 @@ public boolean isPretrainLayer() {
return true;
}
+ @Override
+ public void clearNoiseWeightParams() {
+ weightNoiseParams.clear();
+ }
+
@Override
public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
int minibatchSize) {
@@ -996,8 +1005,8 @@ public INDArray reconstructionLogProbability(INDArray data, int numSamples) {
IActivation afn = layerConf().getActivationFn();
//Forward pass through logStd^2 for P(Z|X)
- INDArray pzxLogStd2W = params.get(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W);
- INDArray pzxLogStd2b = params.get(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_B);
+ INDArray pzxLogStd2W = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W, false);
+ INDArray pzxLogStd2b = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_B, false);
INDArray meanZ = fwd.pzxMeanPreOut;
INDArray logStdev2Z = fwd.encoderActivations[fwd.encoderActivations.length - 1].mmul(pzxLogStd2W)
@@ -1011,8 +1020,8 @@ public INDArray reconstructionLogProbability(INDArray data, int numSamples) {
int minibatch = input.size(0);
int size = fwd.pzxMeanPreOut.size(1);
- INDArray pxzw = params.get(VariationalAutoencoderParamInitializer.PXZ_W);
- INDArray pxzb = params.get(VariationalAutoencoderParamInitializer.PXZ_B);
+ INDArray pxzw = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_W, false);
+ INDArray pxzb = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_B, false);
INDArray[] decoderWeights = new INDArray[decoderLayerSizes.length];
INDArray[] decoderBiases = new INDArray[decoderLayerSizes.length];
@@ -1020,8 +1029,8 @@ public INDArray reconstructionLogProbability(INDArray data, int numSamples) {
for (int i = 0; i < decoderLayerSizes.length; i++) {
String wKey = "d" + i + WEIGHT_KEY_SUFFIX;
String bKey = "d" + i + BIAS_KEY_SUFFIX;
- decoderWeights[i] = params.get(wKey);
- decoderBiases[i] = params.get(bKey);
+ decoderWeights[i] = getParamWithNoise(wKey, false);
+ decoderBiases[i] = getParamWithNoise(bKey, false);
}
INDArray sumReconstructionNegLogProbability = null;
@@ -1079,9 +1088,9 @@ public INDArray generateRandomGivenZ(INDArray latentSpaceValues) {
}
private INDArray decodeGivenLatentSpaceValues(INDArray latentSpaceValues) {
- if (latentSpaceValues.size(1) != params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W).size(1)) {
+ if (latentSpaceValues.size(1) != getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, true).size(1)) {
throw new IllegalArgumentException("Invalid latent space values: expected size "
- + params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W).size(1)
+ + getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, false).size(1)
+ ", got size (dimension 1) = " + latentSpaceValues.size(1) + " " + layerId());
}
@@ -1094,14 +1103,14 @@ private INDArray decodeGivenLatentSpaceValues(INDArray latentSpaceValues) {
for (int i = 0; i < nDecoderLayers; i++) {
String wKey = "d" + i + WEIGHT_KEY_SUFFIX;
String bKey = "d" + i + BIAS_KEY_SUFFIX;
- INDArray w = params.get(wKey);
- INDArray b = params.get(bKey);
+ INDArray w = getParamWithNoise(wKey, false);
+ INDArray b = getParamWithNoise(bKey, false);
currentActivations = currentActivations.mmul(w).addiRowVector(b);
afn.getActivation(currentActivations, false);
}
- INDArray pxzw = params.get(VariationalAutoencoderParamInitializer.PXZ_W);
- INDArray pxzb = params.get(VariationalAutoencoderParamInitializer.PXZ_B);
+ INDArray pxzw = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_W, false);
+ INDArray pxzb = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_B, false);
return currentActivations.mmul(pxzw).addiRowVector(pxzb);
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
index ee6f840ac4b4..4581f000c4a0 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
@@ -21,10 +21,9 @@
import lombok.Getter;
import lombok.Setter;
+import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
-import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.iterator.AsyncDataSetIterator;
import org.deeplearning4j.datasets.iterator.MultiDataSetWrapperIterator;
import org.deeplearning4j.eval.*;
@@ -35,6 +34,7 @@
import org.deeplearning4j.nn.api.layers.IOutputLayer;
import org.deeplearning4j.nn.api.layers.RecurrentLayer;
import org.deeplearning4j.nn.conf.*;
+import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.BaseLayer;
import org.deeplearning4j.nn.conf.layers.FeedForwardLayer;
import org.deeplearning4j.nn.gradient.DefaultGradient;
@@ -68,10 +68,9 @@
import org.nd4j.linalg.heartbeat.utils.TaskUtils;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.memory.abstracts.DummyWorkspace;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.primitives.Triple;
import org.nd4j.linalg.util.FeatureUtil;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.*;
@@ -87,8 +86,8 @@
*
* @author Adam Gibson
*/
+@Slf4j
public class MultiLayerNetwork implements Serializable, Classifier, Layer, NeuralNetwork {
- private static final Logger log = LoggerFactory.getLogger(MultiLayerNetwork.class);
//the hidden neural network layers (including output layer)
protected Layer[] layers;
@@ -644,6 +643,8 @@ public void init(INDArray parameters, boolean cloneParametersArray) {
solver.initOptimizer();
}
}
+
+ synchronizeIterEpochCounts();
}
/**
@@ -735,18 +736,6 @@ public INDArray activate(int layer, INDArray input) {
return getLayer(layer).activate(input);
}
- @Override
- public INDArray activationMean() {
- //TODO determine how to pass back all activationMean for MLN
- throw new UnsupportedOperationException();
- // List avgActivations = new ArrayList<>();
- //
- // for( Layer layer: getLayers() ){
- // avgActivations.add(layer.activationMean());
- // }
- // return Nd4j.toFlattened(avgActivations);
- }
-
/**
* Sets the input and labels from this dataset
*
@@ -1502,6 +1491,7 @@ public void updateRnnStateWithTBPTTState() {
/** Equivalent to backprop(), but calculates gradient for truncated BPTT instead. */
protected void truncatedBPTTGradient() {
+ synchronizeIterEpochCounts();
if (flattenedGradients == null) {
initGradientsView();
}
@@ -2224,6 +2214,8 @@ public void computeGradientAndScore() {
}
truncatedBPTTGradient();
} else {
+ synchronizeIterEpochCounts();
+
//First: do a feed-forward through the network
//Note that we don't actually need to do the full forward pass through the output layer right now; but we do
// need the input to the output layer to be set (such that backprop can be done)
@@ -2258,6 +2250,9 @@ public void computeGradientAndScore() {
}
}
}
+
+ //Clear the post noise/dropconnect parameters on the output layer
+ getOutputLayer().clearNoiseWeightParams();
}
@Override
@@ -2284,42 +2279,6 @@ public void applyConstraints(int iteration, int epoch) {
}
}
- /**
- * Averages the given logistic regression
- * from a mini batch in to this one
- *
- * @param layer the logistic regression to average in to this one
- * @param batchSize the batch size
- * @deprecated Not supported and not used
- */
- @Override
- @Deprecated
- public void merge(Layer layer, int batchSize) {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Deprecated: Merges this network with the other one.
- *
- * @param network the network to merge with
- * @param batchSize the batch size (number of training examples)
- * to average by
- * @deprecated As of 0.7.3 - Feb 2017. No longer used; parameter averaging is performed via alternative means/methods
- */
- @Deprecated
- public void merge(MultiLayerNetwork network, int batchSize) {
- if (network.layers.length != layers.length)
- throw new IllegalArgumentException("Unable to merge networks that are not of equal length");
- for (int i = 0; i < getnLayers(); i++) {
- Layer n = layers[i];
- Layer otherNetwork = network.layers[i];
- n.merge(otherNetwork, batchSize);
-
- }
-
- getOutputLayer().merge(network.getOutputLayer(), batchSize);
- }
-
/**
* Note that if input isn't null
@@ -2364,18 +2323,6 @@ public void setParameters(INDArray params) {
setParams(params);
}
- @Override
- public void applyLearningRateScoreDecay() {
- for (Layer layer : layers) {
- if (!layer.conf().getLearningRateByParam().isEmpty()) {
- for (Map.Entry lrPair : layer.conf().getLearningRateByParam().entrySet()) {
- layer.conf().setLearningRateByParam(lrPair.getKey(),
- lrPair.getValue() * (layer.conf().getLrPolicyDecayRate() + Nd4j.EPS_THRESHOLD));
- }
- }
- }
- }
-
public NeuralNetConfiguration getDefaultConfiguration() {
return defaultConfiguration;
}
@@ -2447,6 +2394,13 @@ public boolean isPretrainLayer() {
return false;
}
+ @Override
+ public void clearNoiseWeightParams() {
+ for(Layer l : layers){
+ l.clearNoiseWeightParams();
+ }
+ }
+
@Override
public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
int minibatchSize) {
@@ -2489,26 +2443,11 @@ public Pair feedForwardMaskArray(INDArray maskArray, MaskSt
//==========
//Layer methods
- @Override
- public Gradient error(INDArray errorSignal) {
- throw new UnsupportedOperationException();
- }
-
@Override
public Type type() {
return Type.MULTILAYER;
}
- @Override
- public INDArray derivativeActivation(INDArray input) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public Gradient calcGradient(Gradient layerError, INDArray activation) {
- throw new UnsupportedOperationException();
- }
-
@Override
public INDArray preOutput(INDArray x) {
INDArray lastLayerActivation = x;
@@ -2562,6 +2501,26 @@ public int getIndex() {
return layerIndex;
}
+ @Override
+ public int getIterationCount() {
+ return getLayerWiseConfigurations().getIterationCount();
+ }
+
+ @Override
+ public int getEpochCount() {
+ return getLayerWiseConfigurations().getEpochCount();
+ }
+
+ @Override
+ public void setIterationCount(int iterationCount) {
+ getLayerWiseConfigurations().setIterationCount(iterationCount);
+ }
+
+ @Override
+ public void setEpochCount(int epochCount) {
+ getLayerWiseConfigurations().setEpochCount(epochCount);
+ }
+
@Override
public double calcL2(boolean backpropParamsOnly) {
double l2 = 0.0;
@@ -3067,7 +3026,7 @@ public String summary(InputType inputType) {
paramShape = "";
in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn());
out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut());
- Set paraNames = currentLayer.conf().getLearningRateByParam().keySet();
+ Set paraNames = currentLayer.paramTable().keySet();
for (String aP : paraNames) {
String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape());
paramShape += aP + ":" + paramS + ", ";
@@ -3124,6 +3083,17 @@ public void incrementEpochCount(){
layerWiseConfigurations.setEpochCount(layerWiseConfigurations.getEpochCount() + 1);
}
+
+ protected void synchronizeIterEpochCounts(){
+ //TODO: this is necessrry for some schedules - but the redundant values are a little ugly...
+ int currIter = getIterationCount();
+ int currEpoch = getEpochCount();
+ for(Layer l : layers){
+ l.setIterationCount(currIter);
+ l.setEpochCount(currEpoch);
+ }
+ }
+
/**
* Indicates whether some other object is "equal to" this one.
*
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java
index 9233e69fd680..fc398609c0be 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java
@@ -9,7 +9,10 @@
import org.nd4j.linalg.api.rng.distribution.Distribution;
import org.nd4j.linalg.indexing.NDArrayIndex;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
/**
* Parameter initializer for the Variational Autoencoder model.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java
index cabece573850..5b2793739db6 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java
@@ -8,8 +8,11 @@
import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.*;
import org.deeplearning4j.nn.conf.distribution.Distribution;
+import org.deeplearning4j.nn.conf.dropout.Dropout;
+import org.deeplearning4j.nn.conf.dropout.IDropout;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.stepfunctions.StepFunction;
+import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise;
import org.deeplearning4j.nn.weights.WeightInit;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.activations.IActivation;
@@ -20,7 +23,6 @@
import java.io.IOException;
import java.util.List;
-import java.util.Map;
/**
* Created by Alex on 21/02/2017.
@@ -37,46 +39,23 @@ public class FineTuneConfiguration {
protected WeightInit weightInit;
protected Double biasInit;
protected Distribution dist;
- protected Double learningRate;
- protected Double biasLearningRate;
- protected Map learningRateSchedule;
- protected Double lrScoreBasedDecay;
protected Double l1;
protected Double l2;
protected Double l1Bias;
protected Double l2Bias;
- protected Double dropOut;
- @Deprecated
- protected Updater updater;
+ protected IDropout dropout;
+ protected IWeightNoise weightNoise;
protected IUpdater iUpdater;
- @Deprecated
- protected Double momentum;
- @Deprecated
- protected Map momentumSchedule;
- @Deprecated
- protected Double epsilon;
- @Deprecated
- protected Double rho;
- @Deprecated
- protected Double rmsDecay;
- @Deprecated
- protected Double adamMeanDecay;
- @Deprecated
- protected Double adamVarDecay;
+ protected IUpdater biasUpdater;
protected Boolean miniBatch;
protected Integer numIterations;
protected Integer maxNumLineSearchIterations;
protected Long seed;
protected OptimizationAlgorithm optimizationAlgo;
protected StepFunction stepFunction;
- protected Boolean useDropConnect;
protected Boolean minimize;
protected GradientNormalization gradientNormalization;
protected Double gradientNormalizationThreshold;
- protected LearningRatePolicy learningRatePolicy;
- protected Double lrPolicyDecayRate;
- protected Double lrPolicySteps;
- protected Double lrPolicyPower;
protected ConvolutionMode convolutionMode;
protected List constraints;
protected Boolean hasBiasConstraints;
@@ -105,19 +84,15 @@ public Builder seed(long seed) {
return this;
}
- /**
- * @deprecated No longer used
- */
- @Deprecated
- public Builder regularization(boolean regularization) {
- return this;
- }
-
public Builder iterations(int iterations) {
this.numIterations = iterations;
return this;
}
+ public Builder dropOut(double dropout){
+ return dropout(new Dropout(dropout));
+ }
+
public Builder activation(Activation activation) {
this.activationFn = activation.getActivationFunction();
return this;
@@ -127,8 +102,8 @@ public Builder updater(IUpdater updater) {
return iUpdater(updater);
}
+ @Deprecated
public Builder updater(Updater updater) {
- this.updater = updater;
return updater(updater.getIUpdaterWithDefaultConfig());
}
}
@@ -147,13 +122,12 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) {
WeightInit origWeightInit = null;
if (l != null) {
- if (dropOut != null)
- l.setDropOut(dropOut);
+ if (dropout != null)
+ l.setIDropout(dropout);
}
if (l != null && l instanceof BaseLayer) {
BaseLayer bl = (BaseLayer) l;
- originalUpdater = bl.getUpdater();
origWeightInit = bl.getWeightInit();
if (activationFn != null)
bl.setActivationFn(activationFn);
@@ -163,18 +137,6 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) {
bl.setBiasInit(biasInit);
if (dist != null)
bl.setDist(dist);
- if (learningRate != null) {
- //usually the same learning rate is applied to both bias and weights
- //so always overwrite the learning rate to both?
- bl.setLearningRate(learningRate);
- bl.setBiasLearningRate(learningRate);
-
- }
- if (biasLearningRate != null)
- bl.setBiasLearningRate(biasLearningRate);
- if (learningRateSchedule != null)
- bl.setLearningRateSchedule(learningRateSchedule);
- // if(lrScoreBasedDecay != null)
if (l1 != null)
bl.setL1(l1);
if (l2 != null)
@@ -183,28 +145,19 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) {
bl.setL1Bias(l1Bias);
if (l2Bias != null)
bl.setL2Bias(l2Bias);
- if (updater != null)
- bl.setUpdater(updater);
- if (iUpdater != null)
- bl.setIUpdater(iUpdater);
- if (momentum != null)
- bl.setMomentum(momentum);
- if (momentumSchedule != null)
- bl.setMomentum(momentum);
- if (epsilon != null)
- bl.setEpsilon(epsilon);
- if (rho != null)
- bl.setRho(rho);
- if (rmsDecay != null)
- bl.setRmsDecay(rmsDecay);
- if (adamMeanDecay != null)
- bl.setAdamMeanDecay(adamMeanDecay);
- if (adamVarDecay != null)
- bl.setAdamVarDecay(adamVarDecay);
if (gradientNormalization != null)
bl.setGradientNormalization(gradientNormalization);
if (gradientNormalizationThreshold != null)
bl.setGradientNormalizationThreshold(gradientNormalizationThreshold);
+ if (iUpdater != null){
+ bl.setIUpdater(iUpdater);
+ }
+ if (biasUpdater != null){
+ bl.setBiasUpdater(biasUpdater);
+ }
+ if (weightNoise != null){
+ bl.setWeightNoise(weightNoise);
+ }
}
if (miniBatch != null)
nnc.setMiniBatch(miniBatch);
@@ -218,16 +171,8 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) {
nnc.setOptimizationAlgo(optimizationAlgo);
if (stepFunction != null)
nnc.setStepFunction(stepFunction);
- if (useDropConnect != null)
- nnc.setUseDropConnect(useDropConnect);
if (minimize != null)
nnc.setMinimize(minimize);
- if (learningRatePolicy != null)
- nnc.setLearningRatePolicy(learningRatePolicy);
- if (lrPolicySteps != null)
- nnc.setLrPolicySteps(lrPolicySteps);
- if (lrPolicyPower != null)
- nnc.setLrPolicyPower(lrPolicyPower);
if (convolutionMode != null && l instanceof ConvolutionLayer) {
((ConvolutionLayer) l).setConvolutionMode(convolutionMode);
@@ -236,47 +181,6 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) {
((SubsamplingLayer) l).setConvolutionMode(convolutionMode);
}
- //Check the updater config. If we change updaters, we want to remove the old config to avoid warnings
- if (l != null && l instanceof BaseLayer && updater != null && originalUpdater != null
- && updater != originalUpdater) {
- BaseLayer bl = (BaseLayer) l;
- switch (originalUpdater) {
- case ADAM:
- case ADAMAX:
- if (adamMeanDecay == null)
- bl.setAdamMeanDecay(Double.NaN);
- if (adamVarDecay == null)
- bl.setAdamVarDecay(Double.NaN);
- break;
- case ADADELTA:
- if (rho == null)
- bl.setRho(Double.NaN);
- if (epsilon == null)
- bl.setEpsilon(Double.NaN);
- break;
- case NESTEROVS:
- if (momentum == null)
- bl.setMomentum(Double.NaN);
- if (momentumSchedule == null)
- bl.setMomentumSchedule(null);
- if (epsilon == null)
- bl.setEpsilon(Double.NaN);
- break;
- case ADAGRAD:
- if (epsilon == null)
- bl.setEpsilon(Double.NaN);
- break;
- case RMSPROP:
- if (rmsDecay == null)
- bl.setRmsDecay(Double.NaN);
- if (epsilon == null)
- bl.setEpsilon(Double.NaN);
- break;
-
- //Other cases: no changes required
- }
- }
-
//Check weight init. Remove dist if originally was DISTRIBUTION, and isn't now -> remove no longer needed distribution
if (l != null && l instanceof BaseLayer && origWeightInit == WeightInit.DISTRIBUTION && weightInit != null
&& weightInit != WeightInit.DISTRIBUTION) {
@@ -285,12 +189,7 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) {
//Perform validation. This also sets the defaults for updaters. For example, Updater.RMSProp -> set rmsDecay
if (l != null) {
- LayerValidation.updaterValidation(l.getLayerName(), l, learningRate, momentum, momentumSchedule,
- adamMeanDecay, adamVarDecay, rho, rmsDecay, epsilon);
-
- boolean useDropCon = (useDropConnect == null ? nnc.isUseDropConnect() : useDropConnect);
- LayerValidation.generalValidation(l.getLayerName(), l, useDropCon, dropOut, l2, l2Bias, l1, l1Bias,
- dist, constraints, null, null);
+ LayerValidation.generalValidation(l.getLayerName(), l, dropout, l2, l2Bias, l1, l1Bias, dist, constraints, null, null);
}
//Also: update the LR, L1 and L2 maps, based on current config (which might be different to original config)
@@ -337,16 +236,6 @@ public NeuralNetConfiguration.Builder appliedNeuralNetConfigurationBuilder() {
confBuilder.setBiasInit(biasInit);
if (dist != null)
confBuilder.setDist(dist);
- if (learningRate != null) {
- //usually the same learning rate is applied to both bias and weights
- //HOWEVER: this is set elsewhere. in the NNC, we only want to override the normal LR
- confBuilder.setLearningRate(learningRate);
- }
- if (biasLearningRate != null)
- confBuilder.setBiasLearningRate(biasLearningRate);
- if (learningRateSchedule != null)
- confBuilder.setLearningRateSchedule(learningRateSchedule);
- // if(lrScoreBasedDecay != null)
if (l1 != null)
confBuilder.setL1(l1);
if (l2 != null)
@@ -355,26 +244,12 @@ public NeuralNetConfiguration.Builder appliedNeuralNetConfigurationBuilder() {
confBuilder.setL1Bias(l1Bias);
if (l2Bias != null)
confBuilder.setL2Bias(l2Bias);
- if (dropOut != null)
- confBuilder.setDropOut(dropOut);
+ if (dropout != null)
+ confBuilder.setIdropOut(dropout);
if (iUpdater != null)
confBuilder.updater(iUpdater);
- if (updater != null)
- confBuilder.setUpdater(updater);
- if (momentum != null)
- confBuilder.setMomentum(momentum);
- if (momentumSchedule != null)
- confBuilder.setMomentum(momentum);
- if (epsilon != null)
- confBuilder.setEpsilon(epsilon);
- if (rho != null)
- confBuilder.setRho(rho);
- if (rmsDecay != null)
- confBuilder.setRmsDecay(rmsDecay);
- if (adamMeanDecay != null)
- confBuilder.setAdamMeanDecay(adamMeanDecay);
- if (adamVarDecay != null)
- confBuilder.setAdamVarDecay(adamVarDecay);
+ if(biasUpdater != null)
+ confBuilder.biasUpdater(biasUpdater);
if (miniBatch != null)
confBuilder.setMiniBatch(miniBatch);
if (numIterations != null)
@@ -387,21 +262,12 @@ public NeuralNetConfiguration.Builder appliedNeuralNetConfigurationBuilder() {
confBuilder.setOptimizationAlgo(optimizationAlgo);
if (stepFunction != null)
confBuilder.setStepFunction(stepFunction);
- if (useDropConnect != null)
- confBuilder.setUseDropConnect(useDropConnect);
if (minimize != null)
confBuilder.setMinimize(minimize);
if (gradientNormalization != null)
confBuilder.setGradientNormalization(gradientNormalization);
if (gradientNormalizationThreshold != null)
confBuilder.setGradientNormalizationThreshold(gradientNormalizationThreshold);
- if (learningRatePolicy != null)
- confBuilder.setLearningRatePolicy(learningRatePolicy);
- if (lrPolicySteps != null)
- confBuilder.setLrPolicySteps(lrPolicySteps);
- if (lrPolicyPower != null)
- confBuilder.setLrPolicyPower(lrPolicyPower);
-
return confBuilder;
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java
index 12b985780720..76a7113a79f2 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java
@@ -295,12 +295,10 @@ public MultiLayerNetwork build() {
origNNC.variables(false).add(s);
origNNC.getL1ByParam().put(s, 0.0);
origNNC.getL2ByParam().put(s, 0.0);
- origNNC.getLearningRateByParam().put(s, 0.0);
layerNNC.variables(false).add(s);
layerNNC.getL1ByParam().put(s, 0.0);
layerNNC.getL2ByParam().put(s, 0.0);
- layerNNC.getLearningRateByParam().put(s, 0.0);
}
}
@@ -817,7 +815,6 @@ public ComputationGraph build() {
newNNC.variables(false).add(s);
newNNC.getL1ByParam().put(s, 0.0);
newNNC.getL2ByParam().put(s, 0.0);
- newNNC.getLearningRateByParam().put(s, 0.0);
}
//We also need to place the layer in the CompGraph Layer[] (replacing the old one)
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java
index 1dc99154086c..fdeac82739c1 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java
@@ -15,9 +15,7 @@
import org.nd4j.linalg.api.ops.DynamicCustomOp;
import org.nd4j.linalg.api.ops.impl.accum.Norm2;
import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.indexing.BooleanIndexing;
import org.nd4j.linalg.indexing.NDArrayIndex;
-import org.nd4j.linalg.indexing.conditions.Conditions;
import java.util.ArrayList;
import java.util.HashMap;
@@ -78,7 +76,7 @@ public BaseMultiLayerUpdater(T network, INDArray updaterState) {
for (int j = 0; j < variables.size(); j++) {
String var = variables.get(j);
int paramSizeThisVariable = layerParamTable.get(var).length();
- int updaterStateSizeThisVariable = (int) layers[i].conf().getLayer().getIUpdaterByParam(var)
+ int updaterStateSizeThisVariable = (int) layers[i].conf().getLayer().getUpdaterByParam(var)
.stateSize(paramSizeThisVariable);
INDArray gradientViewSubset = null;
@@ -207,8 +205,8 @@ public INDArray getStateViewArray() {
}
@Override
- public void update(Layer layer, Gradient gradient, int iteration, int batchSize) {
- update(gradient, iteration, batchSize);
+ public void update(Layer layer, Gradient gradient, int iteration, int epoch, int batchSize) {
+ update(gradient, iteration, epoch, batchSize);
}
/**
@@ -222,7 +220,7 @@ public void update(Layer layer, Gradient gradient, int iteration, int batchSize)
* @param iteration The current iteration (i.e., number of parameter updates so far)
* @param batchSize The current minibatch size (number of examples)
*/
- public void update(Gradient gradient, int iteration, int batchSize) {
+ public void update(Gradient gradient, int iteration, int epoch, int batchSize) {
//First: check if gradient is standard or external...
//In a MultiLayerNetwork, the INDArray returned by .gradient() is always the standard full view array
@@ -277,19 +275,19 @@ public void update(Gradient gradient, int iteration, int batchSize) {
.getAndActivateWorkspace(ComputationGraph.workspaceFeedForward)) {
if (isExternal) {
//RL4J etc type case: calculate gradients in 1 net, update them in another
- ub.updateExternalGradient(iteration, gradient.gradient(), getParams());
+ ub.updateExternalGradient(iteration, epoch, gradient.gradient(), getParams());
} else {
//Standard case
- ub.update(iteration);
+ ub.update(iteration, epoch);
}
}
} else {
if (isExternal) {
//RL4J etc type case: calculate gradients in 1 net, update them in another
- ub.updateExternalGradient(iteration, gradient.gradient(), getParams());
+ ub.updateExternalGradient(iteration, epoch, gradient.gradient(), getParams());
} else {
//Standard case
- ub.update(iteration);
+ ub.update(iteration, epoch);
}
}
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java
index 9ef2491e5c5d..6f17f3acaa2d 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java
@@ -3,14 +3,12 @@
import lombok.AllArgsConstructor;
import lombok.Data;
import org.deeplearning4j.nn.api.Layer;
-import org.deeplearning4j.nn.conf.LearningRatePolicy;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.layers.BaseLayer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.learning.GradientUpdater;
-import org.nd4j.linalg.learning.config.Nesterovs;
import org.nd4j.linalg.ops.transforms.Transforms;
import java.util.ArrayList;
@@ -73,7 +71,7 @@ public void init() {
if (gradientUpdater == null) {
ParamState varState = layersAndVariablesInBlock.get(0);
String varName = varState.getParamName();
- gradientUpdater = varState.getLayer().conf().getLayer().getIUpdaterByParam(varName).instantiate(updaterView,
+ gradientUpdater = varState.getLayer().conf().getLayer().getUpdaterByParam(varName).instantiate(updaterView,
updaterViewRequiresInitialization); //UpdaterUtils.getGradientUpdater(varState.getLayer(), varState.getParamName());
}
}
@@ -103,17 +101,17 @@ public GradientUpdater getGradientUpdater() {
*
* @param iteration The current iteration (i.e., total number of parameter updates so far)
*/
- public void update(int iteration) {
- update(iteration, false, gradientView, null);
+ public void update(int iteration, int epoch) {
+ update(iteration, epoch, false, gradientView, null);
}
- public void updateExternalGradient(int iteration, INDArray fullNetworkGradientView,
+ public void updateExternalGradient(int iteration, int epoch, INDArray fullNetworkGradientView,
INDArray fullNetworkParamsArray) {
//Extract the relevant subset from the external network
- update(iteration, true, fullNetworkGradientView, fullNetworkParamsArray);
+ update(iteration, epoch, true, fullNetworkGradientView, fullNetworkParamsArray);
}
- private void update(int iteration, boolean externalGradient, INDArray fullNetworkGradientView,
+ private void update(int iteration, int epoch, boolean externalGradient, INDArray fullNetworkGradientView,
INDArray fullNetworkParamsArray) {
//Initialize the updater, if necessary
if (gradientUpdater == null) {
@@ -138,14 +136,9 @@ private void update(int iteration, boolean externalGradient, INDArray fullNetwor
//No params for this layer
return;
}
- BaseLayer baseLayer = (BaseLayer) l0.conf().getLayer();
- LearningRatePolicy lrPolicy = l0.conf().getLearningRatePolicy();
- if (lrPolicy != LearningRatePolicy.None || baseLayer.getIUpdater() instanceof Nesterovs) {
- applyLrDecayPolicy(lrPolicy, iteration);
- }
//Apply the updater itself
- gradientUpdater.applyUpdater(blockGradViewArray, iteration);
+ gradientUpdater.applyUpdater(blockGradViewArray, iteration, epoch);
//Post apply: l1 and l2 by params
for (ParamState p : layersAndVariablesInBlock) {
@@ -190,88 +183,4 @@ public void postApply(Layer layer, String paramName, INDArray gradientView, INDA
gradientView.addi(Transforms.sign(paramsView, true).muli(conf.getL1ByParam(paramName)));
}
}
-
- /**
- * Apply learning rate decay, based on the configuration
- *
- * @param decay Learning rate schedule enumeration
- * @param iteration Current iteration
- */
- public void applyLrDecayPolicy(LearningRatePolicy decay, int iteration) {
- Layer layer = layersAndVariablesInBlock.get(0).getLayer();
- String variable = layersAndVariablesInBlock.get(0).getParamName();
-
- NeuralNetConfiguration conf = layer.conf();
- double decayRate = layer.conf().getLrPolicyDecayRate();
- double lr = conf.getLearningRateByParam(variable);
-
- if (!(conf.getLayer() instanceof BaseLayer)) {
- //No params
- return;
- }
-
- BaseLayer baseLayer = (BaseLayer) conf.getLayer();
-
- double newLr;
- switch (decay) {
- case Exponential:
- newLr = lr * Math.pow(decayRate, iteration);
- break;
- case Inverse:
- newLr = lr / Math.pow((1 + decayRate * iteration), conf.getLrPolicyPower());
- break;
- case Step:
- newLr = lr * Math.pow(decayRate, Math.floor(iteration / conf.getLrPolicySteps()));
- break;
- case TorchStep:
- if (iteration > 1 && conf.getLrPolicySteps() % iteration == 0) {
- newLr = lr * decayRate;
- } else {
- newLr = lr;
- }
- break;
- case Poly:
- newLr = lr * Math.pow((1 - ((double) iteration) / conf.getNumIterations()), conf.getLrPolicyPower());
- break;
- case Sigmoid:
- newLr = lr / (1 + Math.exp(-decayRate * (iteration - conf.getLrPolicySteps())));
- break;
- case Schedule:
- if (baseLayer.getLearningRateSchedule().containsKey(iteration)) {
- newLr = baseLayer.getLearningRateSchedule().get(iteration);
- } else {
- newLr = lr;
- }
- break;
- case None:
- case Score:
- newLr = lr;
- break;
- default:
- throw new RuntimeException("Unknown Learning rate decay value: " + decay);
- }
-
- //Handle momentum schedules. Given the new updater design, this change is purely cosmetic
- double newMomentum = 0.0;
- if (baseLayer.getIUpdater() instanceof Nesterovs) {
- if (baseLayer.getMomentumSchedule() != null && baseLayer.getMomentumSchedule().containsKey(iteration)) {
- newMomentum = baseLayer.getMomentumSchedule().get(iteration);
- } else {
- newMomentum = baseLayer.getMomentum();
- }
- }
-
- //Need to set the LR for *all* variables in the Updater block. All variables (by definition of being in the
- // same block) share the same LR schedule
- for (ParamState vs : layersAndVariablesInBlock) {
- vs.getLayer().conf().setLearningRateByParam(vs.getParamName(), newLr);
- if (((BaseLayer) layer.conf().getLayer()).getIUpdater() instanceof Nesterovs) {
- ((BaseLayer) vs.getLayer().conf().getLayer()).setMomentum(newMomentum);
- }
- }
-
- //Apply the new LR according to the schedule.
- //Note: momentum schedules are applied internally in the Nesterov config object applySchedules method
- gradientUpdater.getConfig().applySchedules(iteration, newLr);
- }
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java
index a4764bbf3881..9008ea54d9cc 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java
@@ -1,12 +1,8 @@
package org.deeplearning4j.nn.updater;
import org.deeplearning4j.nn.api.Layer;
-import org.deeplearning4j.nn.conf.LearningRatePolicy;
-import org.deeplearning4j.nn.conf.layers.BaseLayer;
import org.nd4j.linalg.learning.config.IUpdater;
-import java.util.Objects;
-
/**
* Created by Alex on 14/04/2017.
*/
@@ -16,19 +12,16 @@ public class UpdaterUtils {
public static boolean updaterConfigurationsEquals(Layer layer1, String param1, Layer layer2, String param2) {
org.deeplearning4j.nn.conf.layers.Layer l1 = layer1.conf().getLayer();
org.deeplearning4j.nn.conf.layers.Layer l2 = layer2.conf().getLayer();
- IUpdater u1 = l1.getIUpdaterByParam(param1);
- IUpdater u2 = l2.getIUpdaterByParam(param2);
- if (!u1.equals(u2)) {
- //Different updaters or different config
- return false;
- }
+ IUpdater u1 = l1.getUpdaterByParam(param1);
+ IUpdater u2 = l2.getUpdaterByParam(param2);
+
//For updaters to be equal (and hence combinable), we require that:
- //(a) The updater-specific configurations are equal (inc. LR)
- //(b) The learning rate *schedules* are equal
- //(c) If one or more of the params are pretrainable params, they are in the same layer
+ //(a) The updater-specific configurations are equal (inc. LR, LR/momentum schedules etc)
+ //(b) If one or more of the params are pretrainable params, they are in the same layer
// This last point is necessary as we don't want to modify the pretrain gradient/updater state during
// backprop, or modify the pretrain gradient/updater state of one layer while training another
- if (!lrSchedulesEqual(layer1, param1, layer2, param2)) {
+ if (!u1.equals(u2)) {
+ //Different updaters or different config
return false;
}
@@ -43,61 +36,4 @@ public static boolean updaterConfigurationsEquals(Layer layer1, String param1, L
return true;
}
-
- public static boolean lrSchedulesEqual(Layer layer1, String param1, Layer layer2, String param2) {
-
- LearningRatePolicy lp1 = layer1.conf().getLearningRatePolicy();
- LearningRatePolicy lp2 = layer2.conf().getLearningRatePolicy();
-
- if (lp1 != lp2) {
- return false;
- }
-
- double lr1 = layer1.conf().getLearningRateByParam(param1);
- double lr2 = layer2.conf().getLearningRateByParam(param2);
- if (lr1 != lr2) {
- return false;
- }
-
- double dr1 = layer1.conf().getLrPolicyDecayRate();
- double dr2 = layer2.conf().getLrPolicyDecayRate();
-
- boolean lrConfigEqual;
- switch (lp1) {
- case None:
- lrConfigEqual = true;
- break;
- case Exponential:
- lrConfigEqual = dr1 == dr2;
- break;
- case Inverse:
- lrConfigEqual = dr1 == dr2 && layer1.conf().getLrPolicyPower() == layer2.conf().getLrPolicyPower();
- break;
- case Poly:
- lrConfigEqual = layer1.conf().getLrPolicyPower() == layer2.conf().getLrPolicyPower();
- break;
- case Sigmoid:
- lrConfigEqual = dr1 == dr2 && layer1.conf().getLrPolicySteps() == layer2.conf().getLrPolicySteps();
- break;
- case Step:
- lrConfigEqual = dr1 == dr2 && layer1.conf().getLrPolicySteps() == layer2.conf().getLrPolicySteps();
- break;
- case TorchStep:
- lrConfigEqual = layer1.conf().getLrPolicyPower() == layer2.conf().getLrPolicyPower();
- break;
- case Schedule:
- BaseLayer bl1 = (BaseLayer) layer1.conf().getLayer();
- BaseLayer bl2 = (BaseLayer) layer2.conf().getLayer();
- lrConfigEqual = Objects.equals(bl1.getLearningRateSchedule(), bl2.getLearningRateSchedule());
- break;
- case Score:
- //TODO - might be ok sometimes??
- lrConfigEqual = false;
- break;
- default:
- throw new UnsupportedOperationException("Unknown learning rate schedule: " + lp1);
- }
-
- return lrConfigEqual;
- }
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java
index 58b5b69c28be..5062308b83eb 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.optimize.api;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Model;
import org.deeplearning4j.nn.api.Updater;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@@ -26,6 +25,7 @@
import org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater;
import org.deeplearning4j.optimize.solvers.accumulation.GradientsAccumulator;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.io.Serializable;
import java.util.Collection;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java
index b04d71e8a202..d34b11e0622f 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java
@@ -1,8 +1,8 @@
package org.deeplearning4j.optimize.listeners;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Model;
import org.deeplearning4j.optimize.api.IterationListener;
+import org.nd4j.linalg.primitives.Pair;
import java.io.File;
import java.io.FileOutputStream;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java
index d49269f1809f..f909e80993fb 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java
@@ -19,13 +19,11 @@
package org.deeplearning4j.optimize.solvers;
import lombok.Getter;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.exception.InvalidStepException;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.Model;
import org.deeplearning4j.nn.api.Updater;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
-import org.deeplearning4j.nn.conf.LearningRatePolicy;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -42,6 +40,7 @@
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -276,10 +275,6 @@ public boolean checkTerminalConditions(INDArray gradient, double oldScore, doubl
if (condition.terminate(score, oldScore, new Object[] {gradient})) {
log.debug("Hit termination condition on iteration {}: score={}, oldScore={}, condition={}", i, score,
oldScore, condition);
- if (condition instanceof EpsTermination && conf.getLayer() != null
- && conf.getLearningRatePolicy() == LearningRatePolicy.Score) {
- model.applyLearningRateScoreDecay();
- }
return true;
}
}
@@ -323,7 +318,7 @@ public void updateGradientAccordingToParams(Gradient gradient, Model model, int
computationGraphUpdater = new ComputationGraphUpdater(graph);
}
}
- computationGraphUpdater.update(gradient, getIterationCount(model), batchSize);
+ computationGraphUpdater.update(gradient, getIterationCount(model), getEpochCount(model), batchSize);
} else {
if (updater == null) {
try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
@@ -332,7 +327,7 @@ public void updateGradientAccordingToParams(Gradient gradient, Model model, int
}
Layer layer = (Layer) model;
- updater.update(layer, gradient, getIterationCount(model), batchSize);
+ updater.update(layer, gradient, getIterationCount(model), getEpochCount(model), batchSize);
}
}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java
index fc1da60008a0..0d279279173e 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java
@@ -18,7 +18,6 @@
package org.deeplearning4j.optimize.solvers;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Model;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -27,6 +26,7 @@
import org.deeplearning4j.optimize.api.TerminationCondition;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Collection;
import java.util.Iterator;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java
index 0a08882cec6d..fa9e7885053c 100644
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java
@@ -19,7 +19,6 @@
package org.deeplearning4j.optimize.solvers;
import lombok.extern.slf4j.Slf4j;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Model;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.Gradient;
@@ -29,6 +28,7 @@
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Collection;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Dropout.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Dropout.java
deleted file mode 100644
index 54c95507600b..000000000000
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Dropout.java
+++ /dev/null
@@ -1,50 +0,0 @@
-package org.deeplearning4j.util;
-
-import org.deeplearning4j.nn.api.Layer;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.impl.transforms.LegacyDropOut;
-import org.nd4j.linalg.api.ops.impl.transforms.LegacyDropOutInverted;
-import org.nd4j.linalg.api.ops.random.impl.DropOut;
-import org.nd4j.linalg.api.ops.random.impl.DropOutInverted;
-import org.nd4j.linalg.factory.Nd4j;
-
-
-/**
- * @author Adam Gibson
- */
-public class Dropout {
-
- private Dropout() {}
-
- /**
- * Apply drop connect to the given variable
- * @param layer the layer with the variables
- * @param variable the variable to apply
- * @return the post applied drop connect
- */
- public static INDArray applyDropConnect(Layer layer, String variable) {
- INDArray result = layer.getParam(variable).dup();
- if (Nd4j.getRandom().getStatePointer() != null) {
- Nd4j.getExecutioner().exec(new DropOut(result, result, layer.conf().getLayer().getDropOut()));
- } else {
- Nd4j.getExecutioner().exec(new LegacyDropOut(result, result, layer.conf().getLayer().getDropOut()));
- }
- return result;
- }
-
- /**
- * Apply dropout to the given input
- * and return the drop out mask used
- * @param input the input to do drop out on
- * @param dropout the drop out probability
- */
- public static void applyDropout(INDArray input, double dropout) {
- if (Nd4j.getRandom().getStatePointer() != null) {
- Nd4j.getExecutioner().exec(new DropOutInverted(input, dropout));
- } else {
- Nd4j.getExecutioner().exec(new LegacyDropOutInverted(input, dropout));
- }
- }
-
-
-}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/InputSplit.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/InputSplit.java
index 1c9e3ea86d0c..9b71dbfb48bd 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/InputSplit.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/InputSplit.java
@@ -18,8 +18,8 @@
package org.deeplearning4j.util;
-import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.List;
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/LayerValidation.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/LayerValidation.java
deleted file mode 100644
index e8648f7cc167..000000000000
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/LayerValidation.java
+++ /dev/null
@@ -1,27 +0,0 @@
-package org.deeplearning4j.util;
-
-import org.deeplearning4j.exception.DL4JInvalidConfigException;
-
-/**
- * Created by Alex on 12/11/2016.
- */
-public class LayerValidation {
-
- /**
- * Asserts that the layer nIn and nOut values are set for the layer
- *
- * @param layerType Type of layer ("DenseLayer", etc)
- * @param layerName Name of the layer (may be null if not set)
- * @param layerIndex Index of the layer
- * @param nIn nIn value
- * @param nOut nOut value
- */
- public static void assertNInNOutSet(String layerType, String layerName, int layerIndex, int nIn, int nOut) {
- if (nIn <= 0 || nOut <= 0) {
- if (layerName == null)
- layerName = "(name not set)";
- throw new DL4JInvalidConfigException(layerType + " (index=" + layerIndex + ", name=" + layerName + ") nIn="
- + nIn + ", nOut=" + nOut + "; nIn and nOut must be > 0");
- }
- }
-}
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Viterbi.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Viterbi.java
index bdadc88f4181..0ba3cc53a39b 100755
--- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Viterbi.java
+++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Viterbi.java
@@ -19,9 +19,9 @@
package org.deeplearning4j.util;
import org.apache.commons.math3.util.FastMath;
-import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.io.Serializable;
diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java
index fbfa08a698d0..c4cdeb05f7f7 100644
--- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java
+++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java
@@ -2,10 +2,8 @@
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
@@ -17,6 +15,7 @@
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
+import org.nd4j.linalg.learning.config.Nesterovs;
import org.nd4j.linalg.lossfunctions.LossFunctions;
/**
@@ -42,11 +41,9 @@ public void testWrapper() throws Exception {
log.info("Build model....");
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
- .regularization(true).l2(0.0005).learningRate(0.01)//.biasLearningRate(0.02)
- //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75)
+ .l2(0.0005)
.weightInit(WeightInit.XAVIER)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS)
- .momentum(0.9).list()
+ .updater(new Nesterovs(0.01, 0.9)).list()
.layer(0, new ConvolutionLayer.Builder(5, 5)
//nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
.nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build())
diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java
index 8abfa0e81bf7..2dd47b3b7fb4 100644
--- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java
+++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java
@@ -3,7 +3,6 @@
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.datavec.api.util.ClassPathResource;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@@ -19,6 +18,7 @@
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.io.File;
import java.util.Arrays;
diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java
index 65070223b944..edd9b4258100 100644
--- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java
+++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java
@@ -2,10 +2,8 @@
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.deeplearning4j.eval.Evaluation;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
@@ -19,6 +17,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
+import org.nd4j.linalg.learning.config.Nesterovs;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -48,11 +47,10 @@ public void testParallelWrapperRun() throws Exception {
log.info("Build model....");
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
- .regularization(true).l2(0.0005).learningRate(0.01)//.biasLearningRate(0.02)
+ .l2(0.0005)
//.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75)
.weightInit(WeightInit.XAVIER)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS)
- .momentum(0.9).list()
+ .updater(new Nesterovs(0.01, 0.9)).list()
.layer(0, new ConvolutionLayer.Builder(5, 5)
//nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
.nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build())
diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java
index aabf4829e7e6..547279fbca77 100644
--- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java
+++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java
@@ -13,7 +13,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
@@ -22,6 +21,7 @@
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.concurrent.TimeUnit;
@@ -82,7 +82,7 @@ public class TestParallelEarlyStopping {
public void testEarlyStoppingEveryNEpoch() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd()).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
@@ -114,7 +114,7 @@ public void testBadTuning() {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(1.0) //Intentionally huge LR
+ .updater(new Sgd(1.0)) //Intentionally huge LR
.weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java
index 2212a1a8bafc..d9a8229a9474 100644
--- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java
+++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java
@@ -12,7 +12,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@@ -23,6 +22,7 @@
import org.junit.Ignore;
import org.junit.Test;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import static org.junit.Assert.assertEquals;
@@ -37,7 +37,7 @@ public void testParallelStatsListenerCompatibility() throws Exception {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd()).weightInit(WeightInit.XAVIER).list()
.layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build())
.layer(1, new OutputLayer.Builder().nIn(3).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java
index 2ade1dfcfbcb..1bab6b2b5544 100644
--- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java
+++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java
@@ -2,10 +2,8 @@
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
-import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
@@ -17,6 +15,7 @@
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
+import org.nd4j.linalg.learning.config.Nesterovs;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.io.File;
@@ -46,11 +45,9 @@ public void runParallelWrapperMain() throws Exception {
log.info("Build model....");
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
- .regularization(true).l2(0.0005).learningRate(0.01)//.biasLearningRate(0.02)
- //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75)
+ .l2(0.0005)
.weightInit(WeightInit.XAVIER)
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS)
- .momentum(0.9).list()
+ .updater(new Nesterovs(0.01, 0.9)).list()
.layer(0, new ConvolutionLayer.Builder(5, 5)
//nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
.nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build())
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/AutoEncoderNetworkTest.java b/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/AutoEncoderNetworkTest.java
index 904dee8549ef..b2e8bc66102c 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/AutoEncoderNetworkTest.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/AutoEncoderNetworkTest.java
@@ -20,6 +20,8 @@
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
+import org.nd4j.linalg.activations.Activation;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.io.File;
@@ -80,7 +82,7 @@ public void closeIt() {
}
private MultiLayerConfiguration getNNConfiguration() {
- return new NeuralNetConfiguration.Builder().seed(12345).iterations(5).learningRate(.1)
+ return new NeuralNetConfiguration.Builder().seed(12345).iterations(5).updater(new Sgd(0.1))
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list()
.layer(0, new RBM.Builder().nIn(10).nOut(8)
.lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build())
@@ -92,7 +94,7 @@ private MultiLayerConfiguration getNNConfiguration() {
.lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build())
.layer(4, new RBM.Builder().nIn(5).nOut(8)
.lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build()) //decoding starts
- .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation("sigmoid").nIn(8)
+ .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nIn(8)
.nOut(10).build())
.pretrain(true).backprop(true).build();
}
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/SparkDl4jNetworkTest.java b/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/SparkDl4jNetworkTest.java
index 033d819ca33a..b71e814d54b8 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/SparkDl4jNetworkTest.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/SparkDl4jNetworkTest.java
@@ -11,7 +11,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@@ -24,6 +23,8 @@
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
+import org.nd4j.linalg.activations.Activation;
+import org.nd4j.linalg.learning.config.Nesterovs;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.io.File;
@@ -95,12 +96,12 @@ public void closeIt() {
private MultiLayerConfiguration getNNConfiguration() {
return new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(10)
- .weightInit(WeightInit.UNIFORM).learningRate(0.1).updater(Updater.NESTEROVS).list()
+ .weightInit(WeightInit.UNIFORM).updater(new Nesterovs(0.1)).list()
.layer(0, new DenseLayer.Builder().nIn(2).nOut(100).weightInit(WeightInit.XAVIER)
- .activation("relu").build())
+ .activation(Activation.RELU).build())
.layer(1, new DenseLayer.Builder().nIn(100).nOut(120).weightInit(WeightInit.XAVIER)
- .activation("relu").build())
- .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation("softmax").nIn(120)
+ .activation(Activation.RELU).build())
+ .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SOFTMAX).nIn(120)
.nOut(2).build())
.pretrain(false).backprop(true).build();
}
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectors.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectors.java
index 847bd2508261..5f59142c1a8b 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectors.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectors.java
@@ -7,8 +7,6 @@
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.storage.StorageLevel;
-import org.nd4j.linalg.primitives.Counter;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.exception.DL4JInvalidConfigException;
import org.deeplearning4j.models.embeddings.loader.VectorsConfiguration;
import org.deeplearning4j.models.sequencevectors.SequenceVectors;
@@ -24,6 +22,8 @@
import org.deeplearning4j.spark.models.sequencevectors.learning.SparkElementsLearningAlgorithm;
import org.deeplearning4j.spark.models.sequencevectors.learning.SparkSequenceLearningAlgorithm;
import org.deeplearning4j.spark.models.sequencevectors.primitives.ExtraCounter;
+import org.nd4j.linalg.primitives.Counter;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.parameterserver.distributed.VoidParameterServer;
import org.nd4j.parameterserver.distributed.conf.VoidConfiguration;
import org.nd4j.parameterserver.distributed.enums.FaultToleranceStrategy;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/export/impl/VocabCacheExporter.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/export/impl/VocabCacheExporter.java
index 92723139622c..aa70f44eefb2 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/export/impl/VocabCacheExporter.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/export/impl/VocabCacheExporter.java
@@ -3,7 +3,6 @@
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.spark.api.java.JavaRDD;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer;
import org.deeplearning4j.models.word2vec.VocabWord;
@@ -14,6 +13,7 @@
import org.deeplearning4j.spark.models.sequencevectors.export.SparkModelExporter;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.List;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/CountFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/CountFunction.java
index 2fbb2077b6d6..fc8dbbe8bcea 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/CountFunction.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/CountFunction.java
@@ -5,12 +5,12 @@
import org.apache.spark.Accumulator;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.Counter;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.embeddings.loader.VectorsConfiguration;
import org.deeplearning4j.models.sequencevectors.sequence.Sequence;
import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement;
import org.deeplearning4j.spark.models.sequencevectors.learning.SparkElementsLearningAlgorithm;
+import org.nd4j.linalg.primitives.Counter;
+import org.nd4j.linalg.primitives.Pair;
import org.nd4j.parameterserver.distributed.VoidParameterServer;
import org.nd4j.parameterserver.distributed.conf.VoidConfiguration;
import org.nd4j.parameterserver.distributed.messages.TrainingMessage;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/ExtraCountFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/ExtraCountFunction.java
index 691119683977..1de7521200cc 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/ExtraCountFunction.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/ExtraCountFunction.java
@@ -3,10 +3,10 @@
import lombok.NonNull;
import org.apache.spark.Accumulator;
import org.apache.spark.api.java.function.Function;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.sequencevectors.sequence.Sequence;
import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement;
import org.deeplearning4j.spark.models.sequencevectors.primitives.ExtraCounter;
+import org.nd4j.linalg.primitives.Pair;
/**
* This accumulator function does count individual elements, using provided Accumulator
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectorsTest.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectorsTest.java
index 136a59d3d71b..37d2f0344068 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectorsTest.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectorsTest.java
@@ -3,7 +3,6 @@
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
-import org.nd4j.linalg.primitives.Counter;
import org.deeplearning4j.models.sequencevectors.sequence.Sequence;
import org.deeplearning4j.models.sequencevectors.sequence.ShallowSequenceElement;
import org.deeplearning4j.models.word2vec.VocabWord;
@@ -11,6 +10,7 @@
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
+import org.nd4j.linalg.primitives.Counter;
import java.util.ArrayList;
import java.util.List;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java
index 6b25d5eabc1e..b70219e589d6 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java
@@ -26,9 +26,6 @@
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.CounterMap;
-import org.nd4j.linalg.primitives.Pair;
-import org.nd4j.linalg.primitives.Triple;
import org.deeplearning4j.models.glove.GloveWeightLookupTable;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
@@ -39,6 +36,9 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.learning.legacy.AdaGrad;
+import org.nd4j.linalg.primitives.CounterMap;
+import org.nd4j.linalg.primitives.Pair;
+import org.nd4j.linalg.primitives.Triple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/GloveParam.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/GloveParam.java
index a1e7c92b53c6..a2585282defa 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/GloveParam.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/GloveParam.java
@@ -19,8 +19,8 @@
package org.deeplearning4j.spark.models.embeddings.glove;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.CounterMap;
import org.nd4j.linalg.api.rng.Random;
+import org.nd4j.linalg.primitives.CounterMap;
import java.io.Serializable;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/cooccurrences/CoOccurrenceCalculator.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/cooccurrences/CoOccurrenceCalculator.java
index ae76517cc7bd..fa5650ca776d 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/cooccurrences/CoOccurrenceCalculator.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/cooccurrences/CoOccurrenceCalculator.java
@@ -20,11 +20,11 @@
import org.apache.spark.api.java.function.Function;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.CounterMap;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.CounterMap;
+import org.nd4j.linalg.primitives.Pair;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/MapToPairFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/MapToPairFunction.java
index 52ee21574336..126677fdeafb 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/MapToPairFunction.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/MapToPairFunction.java
@@ -1,9 +1,9 @@
package org.deeplearning4j.spark.models.embeddings.word2vec;
import org.apache.spark.api.java.function.Function;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Map;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2Vec.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2Vec.java
index 3bb5e58c58be..2387c81bb085 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2Vec.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2Vec.java
@@ -26,7 +26,6 @@
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
import org.deeplearning4j.models.embeddings.loader.VectorsConfiguration;
import org.deeplearning4j.models.embeddings.wordvectors.WordVectorsImpl;
@@ -41,6 +40,7 @@
import org.nd4j.linalg.heartbeat.reports.Environment;
import org.nd4j.linalg.heartbeat.reports.Event;
import org.nd4j.linalg.heartbeat.utils.EnvironmentUtils;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformer.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformer.java
index ec32ac666e74..737395fa709a 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformer.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformer.java
@@ -22,12 +22,12 @@
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformerVoid.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformerVoid.java
index 65e564e3d63a..c0c57469ba52 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformerVoid.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformerVoid.java
@@ -22,12 +22,12 @@
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/CountCumSum.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/CountCumSum.java
index 7dd9bcad8a75..b446bdce330b 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/CountCumSum.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/CountCumSum.java
@@ -4,8 +4,8 @@
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.Counter;
import org.deeplearning4j.spark.text.accumulators.MaxPerPartitionAccumulator;
+import org.nd4j.linalg.primitives.Counter;
import java.util.concurrent.atomic.AtomicLong;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/TextPipeline.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/TextPipeline.java
index e0e28678edc1..68655ab343f0 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/TextPipeline.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/TextPipeline.java
@@ -22,15 +22,15 @@
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.AtomicDouble;
-import org.nd4j.linalg.primitives.Counter;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.embeddings.loader.VectorsConfiguration;
import org.deeplearning4j.models.word2vec.Huffman;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.models.word2vec.wordstore.inmemory.AbstractCache;
import org.deeplearning4j.spark.text.accumulators.WordFreqAccumulator;
+import org.nd4j.linalg.primitives.AtomicDouble;
+import org.nd4j.linalg.primitives.Counter;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.List;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/WordsListToVocabWordsFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/WordsListToVocabWordsFunction.java
index 82db989ec556..8da5bedb9459 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/WordsListToVocabWordsFunction.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/WordsListToVocabWordsFunction.java
@@ -2,9 +2,9 @@
import org.apache.spark.api.java.function.Function;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.List;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/models/embeddings/glove/GloveTest.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/models/embeddings/glove/GloveTest.java
index 5ba5e5a550a1..18c9c870be2e 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/models/embeddings/glove/GloveTest.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/models/embeddings/glove/GloveTest.java
@@ -21,7 +21,6 @@
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.datavec.api.util.ClassPathResource;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer;
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors;
@@ -31,6 +30,7 @@
import org.deeplearning4j.spark.text.BaseSparkTest;
import org.junit.Ignore;
import org.junit.Test;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Collection;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/text/TextPipelineTest.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/text/TextPipelineTest.java
index ebcebb50e8be..7093d667be1c 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/text/TextPipelineTest.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/text/TextPipelineTest.java
@@ -23,8 +23,6 @@
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.Counter;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.models.word2vec.Huffman;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
@@ -38,6 +36,8 @@
import org.junit.Before;
import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Counter;
+import org.nd4j.linalg.primitives.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingWorker.java b/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingWorker.java
index 21a46fa01678..3049f6313383 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingWorker.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingWorker.java
@@ -2,7 +2,6 @@
import lombok.Getter;
import org.apache.spark.broadcast.Broadcast;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.graph.ComputationGraph;
@@ -16,6 +15,7 @@
import org.deeplearning4j.spark.parameterserver.conf.SharedTrainingConfiguration;
import org.nd4j.linalg.dataset.api.DataSet;
import org.nd4j.linalg.dataset.api.MultiDataSet;
+import org.nd4j.linalg.primitives.Pair;
/**
* @author raver119@gmail.com
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/TrainingWorker.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/TrainingWorker.java
index 50a0385970f6..a026471dd541 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/TrainingWorker.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/TrainingWorker.java
@@ -1,11 +1,11 @@
package org.deeplearning4j.spark.api;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.spark.api.stats.SparkTrainingStats;
import org.nd4j.linalg.dataset.api.DataSet;
import org.nd4j.linalg.dataset.api.MultiDataSet;
+import org.nd4j.linalg.primitives.Pair;
import java.io.Serializable;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerFlatMap.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerFlatMap.java
index c10c7459c763..47d26d01f897 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerFlatMap.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerFlatMap.java
@@ -2,7 +2,6 @@
import org.datavec.spark.functions.FlatMapFunctionAdapter;
import org.datavec.spark.transform.BaseFlatMapFunctionAdaptee;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.iterator.AsyncDataSetIterator;
import org.deeplearning4j.datasets.iterator.IteratorDataSetIterator;
import org.deeplearning4j.nn.graph.ComputationGraph;
@@ -15,6 +14,7 @@
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Collections;
import java.util.Iterator;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerMultiDataSetFlatMap.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerMultiDataSetFlatMap.java
index c71b38d29023..8ec7c6fd6bee 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerMultiDataSetFlatMap.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerMultiDataSetFlatMap.java
@@ -2,7 +2,6 @@
import org.datavec.spark.functions.FlatMapFunctionAdapter;
import org.datavec.spark.transform.BaseFlatMapFunctionAdaptee;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.datasets.iterator.AsyncMultiDataSetIterator;
import org.deeplearning4j.datasets.iterator.IteratorMultiDataSetIterator;
import org.deeplearning4j.nn.graph.ComputationGraph;
@@ -14,6 +13,7 @@
import org.nd4j.linalg.dataset.api.MultiDataSet;
import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.Collections;
import java.util.Iterator;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportDataSetsFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportDataSetsFunction.java
index 4d7f10c17f3a..cc8ec4c88a89 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportDataSetsFunction.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportDataSetsFunction.java
@@ -21,9 +21,9 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.function.Function2;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.util.UIDProvider;
import org.nd4j.linalg.dataset.DataSet;
+import org.nd4j.linalg.primitives.Pair;
import java.net.URI;
import java.util.*;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportMultiDataSetsFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportMultiDataSetsFunction.java
index 7c2f6b7782b1..4631a10c7816 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportMultiDataSetsFunction.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportMultiDataSetsFunction.java
@@ -21,9 +21,9 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.function.Function2;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.util.UIDProvider;
import org.nd4j.linalg.dataset.api.MultiDataSet;
+import org.nd4j.linalg.primitives.Pair;
import java.net.URI;
import java.util.*;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java
index 17833cc789ea..61ebc252adf1 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java
@@ -6,7 +6,6 @@
import org.deeplearning4j.api.storage.StatsStorageRouterProvider;
import org.deeplearning4j.api.storage.StorageMetaData;
import org.deeplearning4j.api.storage.listener.RoutingIterationListener;
-import org.nd4j.linalg.primitives.Pair;
import org.deeplearning4j.nn.api.Model;
import org.deeplearning4j.nn.api.Updater;
import org.deeplearning4j.nn.graph.ComputationGraph;
@@ -26,6 +25,7 @@
import org.nd4j.linalg.dataset.api.DataSet;
import org.nd4j.linalg.dataset.api.MultiDataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
import java.util.ArrayList;
import java.util.Collection;
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java
index f5831620a410..d5cfc89ccdee 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java
@@ -23,7 +23,6 @@
import org.apache.spark.api.java.JavaSparkContext;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.spark.impl.multilayer.SparkDl4jMultiLayer;
import org.deeplearning4j.spark.impl.paramavg.ParameterAveragingTrainingMaster;
import org.junit.After;
@@ -32,6 +31,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Nesterovs;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.io.Serializable;
@@ -121,8 +121,8 @@ protected int numExecutors() {
}
protected MultiLayerConfiguration getBasicConf() {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123).updater(Updater.NESTEROVS)
- .learningRate(0.1).momentum(0.9).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123)
+ .updater(new Nesterovs(0.1, 0.9)).list()
.layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3)
.activation(Activation.TANH).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java
index bc3ed892e964..097cf2b44018 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java
@@ -16,7 +16,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
@@ -28,6 +27,7 @@
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -45,7 +45,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest {
public void testEarlyStoppingIris() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd()).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
@@ -97,7 +97,7 @@ public void testBadTuning() {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(10.0) //Intentionally huge LR
+ .updater(new Sgd(10.0)) //Intentionally huge LR
.weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY)
.lossFunction(LossFunctions.LossFunction.MSE).build())
@@ -134,7 +134,7 @@ public void testTimeTermination() {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(1e-6).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
@@ -177,7 +177,7 @@ public void testNoImprovementNEpochsTermination() {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(0.0).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
@@ -210,7 +210,7 @@ public void testNoImprovementNEpochsTermination() {
public void testListeners() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list()
+ .updater(new Sgd()).weightInit(WeightInit.XAVIER).list()
.layer(0, new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.pretrain(false).backprop(true).build();
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java
index 44c67dc1e9a1..bcbb8b594914 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java
@@ -34,7 +34,6 @@
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.weights.WeightInit;
@@ -48,6 +47,7 @@
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -66,7 +66,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest {
public void testEarlyStoppingIris() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
+ .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
.addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in")
.setOutputs("0").pretrain(false).backprop(true).build();
@@ -116,7 +116,7 @@ public void testBadTuning() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(2.0) //Intentionally huge LR
+ .updater(new Sgd(2.0)) //Intentionally huge LR
.weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
.addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY)
.lossFunction(LossFunctions.LossFunction.MSE).build(), "in")
@@ -154,7 +154,7 @@ public void testTimeTermination() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(1e-6).weightInit(WeightInit.XAVIER).graphBuilder()
+ .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).graphBuilder()
.addInputs("in")
.addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in")
@@ -199,7 +199,7 @@ public void testNoImprovementNEpochsTermination() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).learningRate(0.0).weightInit(WeightInit.XAVIER).graphBuilder()
+ .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).graphBuilder()
.addInputs("in")
.addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in")
@@ -235,7 +235,7 @@ public void testNoImprovementNEpochsTermination() {
public void testListeners() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
- .updater(Updater.SGD).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
+ .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
.addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in")
.setOutputs("0").pretrain(false).backprop(true).build();
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java
index fd224beb6330..4ca1d244206b 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java
@@ -2,7 +2,9 @@
import org.apache.spark.serializer.SerializerInstance;
import org.deeplearning4j.eval.*;
-import org.deeplearning4j.nn.conf.*;
+import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.conf.graph.*;
import org.deeplearning4j.nn.conf.graph.rnn.DuplicateToTimeSeriesVertex;
@@ -13,6 +15,10 @@
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Adam;
+import org.nd4j.linalg.learning.config.Nadam;
+import org.nd4j.linalg.schedule.MapSchedule;
+import org.nd4j.linalg.schedule.ScheduleType;
import scala.collection.JavaConversions;
import java.nio.ByteBuffer;
@@ -29,7 +35,7 @@ public class TestKryo extends BaseSparkKryoTest {
private void testSerialization(T in, SerializerInstance si) {
ByteBuffer bb = si.serialize(in, null);
- T deserialized = si.deserialize(bb, null);
+ T deserialized = (T)si.deserialize(bb, null);
assertEquals(in, deserialized);
}
@@ -41,19 +47,19 @@ public void testSerializationConfigurations() {
//Check network configurations:
Map m = new HashMap<>();
+ m.put(0, 0.5);
m.put(10, 0.1);
- MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().learningRate(0.2)
- .learningRateDecayPolicy(LearningRatePolicy.Schedule).learningRateSchedule(m)
- .updater(Updater.NADAM).list().layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build())
+ MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder()
+ .updater(new Nadam(new MapSchedule(ScheduleType.ITERATION,m))).list().layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build())
.build();
testSerialization(mlc, si);
ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.DISTRIBUTION)
- .dist(new UniformDistribution(-1, 1)).learningRate(0.2)
- .learningRateDecayPolicy(LearningRatePolicy.Schedule)
- .learningRateSchedule(Collections.singletonMap(10, 0.1)).updater(Updater.ADAM).graphBuilder()
+ .dist(new UniformDistribution(-1, 1))
+ .updater(new Adam(new MapSchedule(ScheduleType.ITERATION,m)))
+ .graphBuilder()
.addInputs("in").addLayer("out", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in")
.setOutputs("out").build();
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java
index bbb17ced8844..86fe6b65da47 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java
@@ -29,6 +29,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.ArrayList;
@@ -45,7 +46,7 @@ public void testSparkWithCustomLayer() {
//Basic test - checks whether exceptions etc are thrown with custom layers + spark
//Custom layers are tested more extensively in dl4j core
MultiLayerConfiguration conf =
- new NeuralNetConfiguration.Builder().learningRate(0.1).list()
+ new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)).list()
.layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build())
.layer(1, new CustomLayer(3.14159)).layer(2,
new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java
index f9e92b0e3a4d..1270d00474dd 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java
@@ -36,6 +36,8 @@
import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.io.ClassPathResource;
+import org.nd4j.linalg.learning.config.Nesterovs;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import scala.Tuple2;
@@ -76,7 +78,7 @@ public void testBasic() throws Exception {
list.add(iter.next());
ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.1)
+ .updater(new Sgd(0.1))
.graphBuilder().addInputs("in")
.addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out",
new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3)
@@ -109,8 +111,8 @@ public void testBasic() throws Exception {
@Test
public void testDistributedScoring() {
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l1(0.1).l2(0.1)
- .seed(123).updater(Updater.NESTEROVS).learningRate(0.1).momentum(0.9).graphBuilder()
+ ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.1)
+ .seed(123).updater(new Nesterovs(0.1, 0.9)).graphBuilder()
.addInputs("in")
.addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3)
.activation(Activation.TANH).build(), "in")
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/layer/TestSparkLayer.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/layer/TestSparkLayer.java
index b47394840bcb..817d91e440f2 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/layer/TestSparkLayer.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/layer/TestSparkLayer.java
@@ -30,6 +30,7 @@
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.dataset.DataSet;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.List;
@@ -44,7 +45,7 @@ public class TestSparkLayer extends BaseSparkTest {
public void testIris2() throws Exception {
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(10)
- .learningRate(1e-1)
+ .updater(new Sgd(0.1))
.layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
LossFunctions.LossFunction.MCXENT).nIn(4).nOut(3).weightInit(WeightInit.XAVIER)
.activation(Activation.SOFTMAX).build())
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java
index 93051aafecb0..4dc11ec778df 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java
@@ -2,7 +2,6 @@
import org.apache.spark.api.java.JavaRDD;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.layers.FrozenLayer;
@@ -19,6 +18,7 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.ArrayList;
@@ -36,10 +36,10 @@ public class TestFrozenLayers extends BaseSparkTest {
@Test
public void testSparkFrozenLayers() {
- NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1)
- .updater(Updater.SGD).activation(Activation.TANH);
+ NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
+ .activation(Activation.TANH);
- FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().learningRate(0.1).build();
+ FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build();
int nIn = 6;
int nOut = 3;
@@ -114,13 +114,13 @@ public void testSparkFrozenLayers() {
@Test
public void testSparkFrozenLayersCompGraph() {
- FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().learningRate(0.1).build();
+ FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build();
int nIn = 6;
int nOut = 3;
- ComputationGraph origModel = new ComputationGraph(new NeuralNetConfiguration.Builder().learningRate(0.1)
- .updater(Updater.SGD).activation(Activation.TANH).graphBuilder().addInputs("in")
+ ComputationGraph origModel = new ComputationGraph(new NeuralNetConfiguration.Builder().updater(new Sgd(0.1))
+ .activation(Activation.TANH).graphBuilder().addInputs("in")
.addLayer("0", new DenseLayer.Builder().nIn(6).nOut(5).build(), "in")
.addLayer("1", new DenseLayer.Builder().nIn(5).nOut(4).build(), "0")
.addLayer("2", new DenseLayer.Builder().nIn(4).nOut(3).build(), "1")
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java
index 3231f4a7a1d3..fba7288f2c86 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java
@@ -7,7 +7,6 @@
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
@@ -25,6 +24,9 @@
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.IUpdater;
+import org.nd4j.linalg.learning.config.RmsProp;
+import org.nd4j.linalg.learning.config.Sgd;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.ArrayList;
@@ -48,10 +50,10 @@ public void setUp() {
}
- private static MultiLayerConfiguration getConf(int seed, Updater updater) {
+ private static MultiLayerConfiguration getConf(int seed, IUpdater updater) {
Nd4j.getRandom().setSeed(seed);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.5)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.XAVIER).updater(updater).iterations(1).seed(seed).list()
.layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder()
.lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).build())
@@ -59,10 +61,10 @@ private static MultiLayerConfiguration getConf(int seed, Updater updater) {
return conf;
}
- private static MultiLayerConfiguration getConfCNN(int seed, Updater updater) {
+ private static MultiLayerConfiguration getConfCNN(int seed, IUpdater updater) {
Nd4j.getRandom().setSeed(seed);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.5)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.XAVIER).updater(updater).iterations(1).seed(seed).list()
.layer(0, new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0)
.activation(Activation.TANH).build())
@@ -74,10 +76,10 @@ private static MultiLayerConfiguration getConfCNN(int seed, Updater updater) {
return conf;
}
- private static ComputationGraphConfiguration getGraphConf(int seed, Updater updater) {
+ private static ComputationGraphConfiguration getGraphConf(int seed, IUpdater updater) {
Nd4j.getRandom().setSeed(seed);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.5)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.XAVIER).updater(updater).iterations(1).seed(seed).graphBuilder()
.addInputs("in")
.addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1",
@@ -88,10 +90,10 @@ private static ComputationGraphConfiguration getGraphConf(int seed, Updater upda
return conf;
}
- private static ComputationGraphConfiguration getGraphConfCNN(int seed, Updater updater) {
+ private static ComputationGraphConfiguration getGraphConfCNN(int seed, IUpdater updater) {
Nd4j.getRandom().setSeed(seed);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
- .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.5)
+ .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.XAVIER).updater(updater).iterations(1).seed(seed).graphBuilder()
.addInputs("in")
.addLayer("0", new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1)
@@ -171,7 +173,7 @@ public void testOneExecutor() {
//Do training locally, for 3 minibatches
int[] seeds = {1, 2, 3};
- MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, Updater.RMSPROP));
+ MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, new RmsProp(0.5)));
net.init();
INDArray initialParams = net.params().dup();
@@ -185,7 +187,7 @@ public void testOneExecutor() {
//Do training on Spark with one executor, for 3 separate minibatches
TrainingMaster tm = getTrainingMaster(1, miniBatchSize, saveUpdater);
- SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, Updater.RMSPROP), tm);
+ SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, new RmsProp(0.5)), tm);
sparkNet.setCollectTrainingStats(true);
INDArray initialSparkParams = sparkNet.getNetwork().params().dup();
@@ -221,7 +223,7 @@ public void testOneExecutorGraph() {
//Do training locally, for 3 minibatches
int[] seeds = {1, 2, 3};
- ComputationGraph net = new ComputationGraph(getGraphConf(12345, Updater.RMSPROP));
+ ComputationGraph net = new ComputationGraph(getGraphConf(12345, new RmsProp(0.5)));
net.init();
INDArray initialParams = net.params().dup();
@@ -236,7 +238,7 @@ public void testOneExecutorGraph() {
//Do training on Spark with one executor, for 3 separate minibatches
TrainingMaster tm = getTrainingMaster(1, miniBatchSize, saveUpdater);
SparkComputationGraph sparkNet =
- new SparkComputationGraph(sc, getGraphConf(12345, Updater.RMSPROP), tm);
+ new SparkComputationGraph(sc, getGraphConf(12345, new RmsProp(0.5)), tm);
sparkNet.setCollectTrainingStats(true);
INDArray initialSparkParams = sparkNet.getNetwork().params().dup();
@@ -280,7 +282,7 @@ public void testAverageEveryStep() {
// CudaGridExecutioner executioner = (CudaGridExecutioner) Nd4j.getExecutioner();
- MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, Updater.SGD));
+ MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, new Sgd(0.5)));
net.init();
INDArray initialParams = net.params().dup();
// executioner.addToWatchdog(initialParams, "initialParams");
@@ -301,7 +303,7 @@ public void testAverageEveryStep() {
.saveUpdater(saveUpdater).workerPrefetchNumBatches(0)
// .rddTrainingApproach(RDDTrainingApproach.Direct)
.rddTrainingApproach(RDDTrainingApproach.Export).build();
- SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, Updater.SGD), tm);
+ SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, new Sgd(0.5)), tm);
sparkNet.setCollectTrainingStats(true);
INDArray initialSparkParams = sparkNet.getNetwork().params().dup();
@@ -357,7 +359,7 @@ public void testAverageEveryStepCNN() {
//Do training locally, for 3 minibatches
int[] seeds = {1, 2, 3};
- MultiLayerNetwork net = new MultiLayerNetwork(getConfCNN(12345, Updater.SGD));
+ MultiLayerNetwork net = new MultiLayerNetwork(getConfCNN(12345, new Sgd(0.5)));
net.init();
INDArray initialParams = net.params().dup();
@@ -374,7 +376,7 @@ public void testAverageEveryStepCNN() {
.averagingFrequency(1).batchSizePerWorker(miniBatchSizePerWorker)
.saveUpdater(saveUpdater).workerPrefetchNumBatches(0)
.rddTrainingApproach(RDDTrainingApproach.Export).build();
- SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConfCNN(12345, Updater.SGD), tm);
+ SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConfCNN(12345, new Sgd(0.5)), tm);
sparkNet.setCollectTrainingStats(true);
INDArray initialSparkParams = sparkNet.getNetwork().params().dup();
@@ -430,7 +432,7 @@ public void testAverageEveryStepGraph() {
// CudaGridExecutioner executioner = (CudaGridExecutioner) Nd4j.getExecutioner();
- ComputationGraph net = new ComputationGraph(getGraphConf(12345, Updater.SGD));
+ ComputationGraph net = new ComputationGraph(getGraphConf(12345, new Sgd(0.5)));
net.init();
INDArray initialParams = net.params().dup();
// executioner.addToWatchdog(initialParams, "initialParams");
@@ -446,7 +448,7 @@ public void testAverageEveryStepGraph() {
//Do training on Spark with one executor, for 3 separate minibatches
TrainingMaster tm = getTrainingMaster(1, miniBatchSizePerWorker, saveUpdater);
- SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConf(12345, Updater.SGD), tm);
+ SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConf(12345, new Sgd(0.5)), tm);
sparkNet.setCollectTrainingStats(true);
INDArray initialSparkParams = sparkNet.getNetwork().params().dup();
@@ -506,7 +508,7 @@ public void testAverageEveryStepGraphCNN() {
//Do training locally, for 3 minibatches
int[] seeds = {1, 2, 3};
- ComputationGraph net = new ComputationGraph(getGraphConfCNN(12345, Updater.SGD));
+ ComputationGraph net = new ComputationGraph(getGraphConfCNN(12345, new Sgd(0.5)));
net.init();
INDArray initialParams = net.params().dup();
@@ -520,7 +522,7 @@ public void testAverageEveryStepGraphCNN() {
//Do training on Spark with one executor, for 3 separate minibatches
TrainingMaster tm = getTrainingMaster(1, miniBatchSizePerWorker, saveUpdater);
- SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConfCNN(12345, Updater.SGD), tm);
+ SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConfCNN(12345, new Sgd(0.5)), tm);
sparkNet.setCollectTrainingStats(true);
INDArray initialSparkParams = sparkNet.getNetwork().params().dup();
diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java
index f570618a11e4..e34761287d59 100644
--- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java
+++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java
@@ -39,14 +39,12 @@
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.BaseLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.RBM;
import org.deeplearning4j.nn.conf.layers.variational.GaussianReconstructionDistribution;
import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder;
-import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
@@ -65,6 +63,9 @@
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.io.ClassPathResource;
+import org.nd4j.linalg.learning.config.IUpdater;
+import org.nd4j.linalg.learning.config.Nesterovs;
+import org.nd4j.linalg.learning.config.RmsProp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import scala.Tuple2;
@@ -196,14 +197,14 @@ public void testUpdaters() {
MultiLayerNetwork netCopy = sparkNet.getNetwork().clone();
netCopy.fit(data);
- Updater expectedUpdater = ((BaseLayer) netCopy.conf().getLayer()).getUpdater();
- double expectedLR = ((BaseLayer) netCopy.conf().getLayer()).getLearningRate();
- double expectedMomentum = ((BaseLayer) netCopy.conf().getLayer()).getMomentum();
+ IUpdater expectedUpdater = ((BaseLayer) netCopy.conf().getLayer()).getIUpdater();
+ double expectedLR = ((Nesterovs)((BaseLayer) netCopy.conf().getLayer()).getIUpdater()).getLearningRate();
+ double expectedMomentum = ((Nesterovs)((BaseLayer) netCopy.conf().getLayer()).getIUpdater()).getMomentum();
- Updater actualUpdater = ((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getUpdater();
+ IUpdater actualUpdater = ((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater();
sparkNet.fit(sparkData);
- double actualLR = ((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getLearningRate();
- double actualMomentum = ((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getMomentum();
+ double actualLR = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater()).getLearningRate();
+ double actualMomentum = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater()).getMomentum();
assertEquals(expectedUpdater, actualUpdater);
assertEquals(expectedLR, actualLR, 0.01);
@@ -248,7 +249,7 @@ public void testSmallAmountOfData() {
//Idea: Test spark training where some executors don't get any data
//in this case: by having fewer examples (2 DataSets) than executors (local[*])
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list()
.layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3)
.activation(Activation.TANH).build())
@@ -273,8 +274,8 @@ public void testSmallAmountOfData() {
@Test
public void testDistributedScoring() {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l1(0.1).l2(0.1)
- .seed(123).updater(Updater.NESTEROVS).learningRate(0.1).momentum(0.9).list()
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.1)
+ .seed(123).updater(new Nesterovs(0.1, 0.9)).list()
.layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3)
.activation(Activation.TANH).build())
.layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
@@ -358,7 +359,7 @@ public void testParameterAveragingMultipleExamplesPerDataSet() throws Exception
list.add(iter.next());
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list()
.layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50)
.activation(Activation.TANH).build())
@@ -419,7 +420,7 @@ public void testFitViaStringPaths() throws Exception {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list()
.layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50)
.activation(Activation.TANH).build())
@@ -485,7 +486,7 @@ public void testFitViaStringPathsSize1() throws Exception {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list()
.layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50)
.activation(Activation.TANH).build())
@@ -569,7 +570,7 @@ public void testFitViaStringPathsCompGraph() throws Exception {
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP)
+ ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
.graphBuilder().addInputs("in")
.addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50)
@@ -636,7 +637,7 @@ public void testFitViaStringPathsCompGraph() throws Exception {
@Test
public void testSeedRepeatability() throws Exception {
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.RMSPROP)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
.weightInit(WeightInit.XAVIER).list()
.layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(4)
@@ -715,7 +716,7 @@ public void testIterationCounts() throws Exception {
list.add(iter.next());
}
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list()
.layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50)
.activation(Activation.TANH).build())
@@ -761,7 +762,7 @@ public void testIterationCountsGraph() throws Exception {
list.add(iter.next());
}
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP)
+ ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp())
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
.graphBuilder().addInputs("in")
.addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50)
@@ -801,11 +802,11 @@ public void testVaePretrainSimple() {
int nIn = 8;
Nd4j.getRandom().setSeed(12345);
- MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.RMSPROP)
+ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp())
.weightInit(WeightInit.XAVIER).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12)
.decoderLayerSizes(13).reconstructionDistribution(
- new GaussianReconstructionDistribution("identity"))
+ new GaussianReconstructionDistribution(Activation.IDENTITY))
.build())
.pretrain(true).backprop(false).build();
@@ -836,11 +837,11 @@ public void testVaePretrainSimpleCG() {
int nIn = 8;
Nd4j.getRandom().setSeed(12345);
- ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.RMSPROP)
+ ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp())
.weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
.addLayer("0", new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12)
.decoderLayerSizes(13).reconstructionDistribution(
- new GaussianReconstructionDistribution("identity"))
+ new GaussianReconstructionDistribution(Activation.IDENTITY))
.build(), "in")
.setOutputs("0").pretrain(true).backprop(false).build();
diff --git a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/flow/FlowListenerModule.java b/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/flow/FlowListenerModule.java
deleted file mode 100644
index fd4a787d6eb2..000000000000
--- a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/flow/FlowListenerModule.java
+++ /dev/null
@@ -1,117 +0,0 @@
-package org.deeplearning4j.ui.module.flow;
-
-import lombok.extern.slf4j.Slf4j;
-import org.deeplearning4j.api.storage.Persistable;
-import org.deeplearning4j.api.storage.StatsStorage;
-import org.deeplearning4j.api.storage.StatsStorageEvent;
-import org.deeplearning4j.ui.api.FunctionType;
-import org.deeplearning4j.ui.api.HttpMethod;
-import org.deeplearning4j.ui.api.Route;
-import org.deeplearning4j.ui.api.UIModule;
-import org.deeplearning4j.ui.flow.data.FlowStaticPersistable;
-import org.deeplearning4j.ui.flow.data.FlowUpdatePersistable;
-import play.libs.Json;
-import play.mvc.Result;
-
-import java.util.*;
-
-import static play.mvc.Results.ok;
-
-/**
- * Module for FlowIterationListener
- *
- * @author Alex Black
- */
-@Slf4j
-public class FlowListenerModule implements UIModule {
-
- private static final String TYPE_ID = "FlowListener";
-
- private Map knownSessionIDs = Collections.synchronizedMap(new LinkedHashMap<>());
-
- @Override
- public List getCallbackTypeIDs() {
- return Collections.singletonList(TYPE_ID);
- }
-
- @Override
- public List getRoutes() {
- Route r1 = new Route("/flow", HttpMethod.GET, FunctionType.Supplier,
- () -> ok(org.deeplearning4j.ui.views.html.flow.Flow.apply()));
- Route r2 = new Route("/flow/info/:id", HttpMethod.GET, FunctionType.Function, this::getStaticInfo);
- Route r3 = new Route("/flow/state/:id", HttpMethod.GET, FunctionType.Function, this::getUpdate);
- Route r4 = new Route("/flow/listSessions", HttpMethod.GET, FunctionType.Supplier, this::listSessions);
-
- return Arrays.asList(r1, r2, r3, r4);
- }
-
- @Override
- public void reportStorageEvents(Collection events) {
- //We should only be getting relevant session IDs...
- for (StatsStorageEvent sse : events) {
- if (!knownSessionIDs.containsKey(sse.getSessionID())) {
- knownSessionIDs.put(sse.getSessionID(), sse.getStatsStorage());
- }
- }
- }
-
- @Override
- public void onAttach(StatsStorage statsStorage) {
- for (String sessionID : statsStorage.listSessionIDs()) {
- for (String typeID : statsStorage.listTypeIDsForSession(sessionID)) {
- if (!TYPE_ID.equals(typeID))
- continue;
- knownSessionIDs.put(sessionID, statsStorage);
- }
- }
- }
-
- @Override
- public void onDetach(StatsStorage statsStorage) {
- for (String s : knownSessionIDs.keySet()) {
- if (statsStorage == knownSessionIDs.get(s)) {
- knownSessionIDs.remove(s);
- }
- }
- }
-
- private Result listSessions() {
- return ok(Json.toJson(knownSessionIDs.keySet()));
- }
-
- private Result getStaticInfo(String sessionID) {
- if (!knownSessionIDs.containsKey(sessionID))
- return ok("Unknown session ID");
- StatsStorage ss = knownSessionIDs.get(sessionID);
-
- List list = ss.getAllStaticInfos(sessionID, TYPE_ID);
- if (list == null || list.size() == 0)
- return ok();
-
- Persistable p = list.get(0);
- if (!(p instanceof FlowStaticPersistable))
- return ok();
-
- FlowStaticPersistable f = (FlowStaticPersistable) p;
-
- return ok(Json.toJson(f.getModelInfo()));
- }
-
- private Result getUpdate(String sessionID) {
- if (!knownSessionIDs.containsKey(sessionID))
- return ok("Unknown session ID");
- StatsStorage ss = knownSessionIDs.get(sessionID);
-
- List list = ss.getLatestUpdateAllWorkers(sessionID, TYPE_ID);
- if (list == null || list.size() == 0)
- return ok();
-
- Persistable p = list.get(0);
- if (!(p instanceof FlowUpdatePersistable))
- return ok();
-
- FlowUpdatePersistable f = (FlowUpdatePersistable) p;
-
- return ok(Json.toJson(f.getModelState()));
- }
-}
diff --git a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/histogram/HistogramModule.java b/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/histogram/HistogramModule.java
deleted file mode 100644
index 009524616158..000000000000
--- a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/histogram/HistogramModule.java
+++ /dev/null
@@ -1,207 +0,0 @@
-package org.deeplearning4j.ui.module.histogram;
-
-import lombok.extern.slf4j.Slf4j;
-import org.deeplearning4j.api.storage.Persistable;
-import org.deeplearning4j.api.storage.StatsStorage;
-import org.deeplearning4j.api.storage.StatsStorageEvent;
-import org.deeplearning4j.ui.api.FunctionType;
-import org.deeplearning4j.ui.api.HttpMethod;
-import org.deeplearning4j.ui.api.Route;
-import org.deeplearning4j.ui.api.UIModule;
-import org.deeplearning4j.ui.stats.StatsListener;
-import org.deeplearning4j.ui.stats.api.StatsInitializationReport;
-import org.deeplearning4j.ui.stats.api.StatsReport;
-import org.deeplearning4j.ui.stats.api.StatsType;
-import org.deeplearning4j.ui.stats.api.SummaryType;
-import org.deeplearning4j.ui.weights.beans.CompactModelAndGradient;
-import play.libs.Json;
-import play.mvc.Result;
-import play.mvc.Results;
-
-import java.util.*;
-
-import static play.mvc.Results.ok;
-
-/**
- * Module for the HistogramIterationListener
- *
- * @author Alex Black
- */
-@Slf4j
-public class HistogramModule implements UIModule {
-
- private Map knownSessionIDs = Collections.synchronizedMap(new LinkedHashMap<>());
-
-
- @Override
- public List getCallbackTypeIDs() {
- return Collections.singletonList(StatsListener.TYPE_ID);
- }
-
- @Override
- public List getRoutes() {
- Route r = new Route("/weights", HttpMethod.GET, FunctionType.Supplier,
- () -> ok(org.deeplearning4j.ui.views.html.histogram.Histogram.apply()));
- Route r2 = new Route("/weights/listSessions", HttpMethod.GET, FunctionType.Supplier,
- () -> ok(Json.toJson(knownSessionIDs.keySet())));
- Route r3 = new Route("/weights/updated/:sid", HttpMethod.GET, FunctionType.Function, this::getLastUpdateTime);
- Route r4 = new Route("/weights/data/:sid", HttpMethod.GET, FunctionType.Function, this::processRequest);
-
- return Arrays.asList(r, r2, r3, r4);
- }
-
- @Override
- public void reportStorageEvents(Collection events) {
- log.trace("Received events: {}", events);
-
- //We should only be getting relevant session IDs...
- for (StatsStorageEvent sse : events) {
- if (!knownSessionIDs.containsKey(sse.getSessionID())) {
- knownSessionIDs.put(sse.getSessionID(), sse.getStatsStorage());
- }
- }
- }
-
- @Override
- public void onAttach(StatsStorage statsStorage) {
- for (String sessionID : statsStorage.listSessionIDs()) {
- for (String typeID : statsStorage.listTypeIDsForSession(sessionID)) {
- if (!StatsListener.TYPE_ID.equals(typeID))
- continue;
- knownSessionIDs.put(sessionID, statsStorage);
- }
- }
- }
-
- @Override
- public void onDetach(StatsStorage statsStorage) {
- for (String sessionID : statsStorage.listSessionIDs()) {
- knownSessionIDs.remove(sessionID);
- }
- }
-
- private Result getLastUpdateTime(String sessionID) {
- return Results.ok(Json.toJson(System.currentTimeMillis()));
- }
-
- private Result processRequest(String sessionId) {
- //TODO cache the relevant info and update, rather than querying StatsStorage and building from scratch each time
-
- StatsStorage ss = knownSessionIDs.get(sessionId);
- if (ss == null) {
- return Results.notFound("Unknown session ID: " + sessionId);
- }
-
- List workerIDs = ss.listWorkerIDsForSession(sessionId);
-
- //TODO checks
- StatsInitializationReport initReport = (StatsInitializationReport) ss.getStaticInfo(sessionId,
- StatsListener.TYPE_ID, workerIDs.get(0));
- if (initReport == null)
- return Results.ok(Json.toJson(Collections.EMPTY_MAP));
-
- String[] paramNames = initReport.getModelParamNames();
- //Infer layer names from param names...
- Set layerNameSet = new LinkedHashSet<>();
- for (String s : paramNames) {
- String[] split = s.split("_");
- if (!layerNameSet.contains(split[0])) {
- layerNameSet.add(split[0]);
- }
- }
- List layerNameList = new ArrayList<>(layerNameSet);
-
- List list = ss.getAllUpdatesAfter(sessionId, StatsListener.TYPE_ID, workerIDs.get(0), 0);
- Collections.sort(list, (a, b) -> Long.compare(a.getTimeStamp(), b.getTimeStamp()));
-
- List scoreList = new ArrayList<>(list.size());
- List