diff --git a/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIterator.java b/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIterator.java index c96285b373c6..82d5ca3feff9 100644 --- a/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIterator.java +++ b/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIterator.java @@ -31,7 +31,6 @@ import org.datavec.api.util.ndarray.RecordConverter; import org.datavec.api.writable.NDArrayWritable; import org.datavec.api.writable.Writable; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.datavec.exception.ZeroLengthSequenceException; import org.deeplearning4j.exception.DL4JException; import org.nd4j.linalg.api.ndarray.INDArray; @@ -41,6 +40,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; import java.io.IOException; import java.io.Serializable; diff --git a/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/iterator/impl/TinyImageNetDataSetIterator.java b/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/iterator/impl/TinyImageNetDataSetIterator.java index 6a8d41398927..9dda65428e75 100644 --- a/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/iterator/impl/TinyImageNetDataSetIterator.java +++ b/deeplearning4j-core/src/main/java/org/deeplearning4j/datasets/iterator/impl/TinyImageNetDataSetIterator.java @@ -19,21 +19,12 @@ package org.deeplearning4j.datasets.iterator.impl; import lombok.Getter; -import org.datavec.api.io.labels.ParentPathLabelGenerator; -import org.datavec.api.io.labels.PathLabelGenerator; -import org.datavec.api.io.labels.PatternPathLabelGenerator; -import org.datavec.image.recordreader.ImageRecordReader; import org.datavec.image.transform.ImageTransform; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; -import org.deeplearning4j.datasets.fetchers.*; -import org.deeplearning4j.datasets.iterator.BaseDatasetIterator; +import org.deeplearning4j.datasets.fetchers.DataSetType; +import org.deeplearning4j.datasets.fetchers.TinyImageNetFetcher; import org.nd4j.linalg.dataset.api.DataSetPreProcessor; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; - /** * Tiny ImageNet is a subset of the ImageNet database. TinyImageNet is the default course challenge for CS321n * at Stanford University. diff --git a/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java b/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java index ab65e9b05aae..c59db1163db6 100644 --- a/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java +++ b/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java @@ -23,7 +23,6 @@ import com.google.common.util.concurrent.AtomicDouble; import lombok.extern.slf4j.Slf4j; import org.apache.commons.math3.util.FastMath; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.clustering.sptree.DataPoint; import org.deeplearning4j.clustering.sptree.SpTree; import org.deeplearning4j.clustering.vptree.VPTree; @@ -39,6 +38,7 @@ import org.nd4j.linalg.indexing.conditions.Conditions; import org.nd4j.linalg.indexing.functions.Value; import org.nd4j.linalg.learning.legacy.AdaGrad; +import org.nd4j.linalg.primitives.Pair; import java.io.BufferedWriter; import java.io.File; @@ -671,11 +671,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) { throw new UnsupportedOperationException(); } - @Override - public void applyLearningRateScoreDecay() { - throw new UnsupportedOperationException("Not yet implemented"); - } - @Override public void fit(INDArray data) { this.x = data; diff --git a/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/Tsne.java b/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/Tsne.java index b5581e7f0a29..5fd36bddb617 100644 --- a/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/Tsne.java +++ b/deeplearning4j-core/src/main/java/org/deeplearning4j/plot/Tsne.java @@ -2,7 +2,6 @@ import com.google.common.primitives.Ints; import org.apache.commons.math3.util.FastMath; -import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dimensionalityreduction.PCA; import org.nd4j.linalg.factory.Nd4j; @@ -12,6 +11,7 @@ import org.nd4j.linalg.indexing.conditions.Conditions; import org.nd4j.linalg.indexing.functions.Value; import org.nd4j.linalg.learning.legacy.AdaGrad; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.util.ArrayUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java new file mode 100644 index 000000000000..bde9eb28a806 --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java @@ -0,0 +1,55 @@ +package org.deeplearning4j; + +import org.deeplearning4j.nn.graph.ComputationGraph; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.util.ModelSerializer; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import static org.junit.Assert.assertEquals; + +public class TestUtils { + + public static MultiLayerNetwork testModelSerialization(MultiLayerNetwork net){ + + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ModelSerializer.writeModel(net, baos, true); + byte[] bytes = baos.toByteArray(); + + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); + + assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); + assertEquals(net.params(), restored.params()); + + return restored; + } catch (IOException e){ + //Should never happen + throw new RuntimeException(e); + } + } + + public static ComputationGraph testModelSerialization(ComputationGraph net){ + + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ModelSerializer.writeModel(net, baos, true); + byte[] bytes = baos.toByteArray(); + + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais, true); + + assertEquals(net.getConfiguration(), restored.getConfiguration()); + assertEquals(net.params(), restored.params()); + + return restored; + } catch (IOException e){ + //Should never happen + throw new RuntimeException(e); + } + } + +} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIteratorTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIteratorTest.java index 8ad663cfba6d..773b47184c1c 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIteratorTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIteratorTest.java @@ -1,10 +1,10 @@ package org.deeplearning4j.datasets.iterator; import org.apache.commons.lang3.RandomUtils; -import org.nd4j.linalg.primitives.Pair; import org.junit.Test; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; +import org.nd4j.linalg.primitives.Pair; import java.util.Iterator; import java.util.concurrent.atomic.AtomicInteger; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/MultipleEpochsIteratorTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/MultipleEpochsIteratorTest.java index 44a1b2ce2dc5..9cfe3f3b783e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/MultipleEpochsIteratorTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/MultipleEpochsIteratorTest.java @@ -98,7 +98,7 @@ public void testLoadBatchDataSet() throws Exception { public void testCifarDataSetIteratorReset() { int epochs = 2; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new DenseLayer.Builder().nIn(400).nOut(50).activation(Activation.RELU).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java index 291af43ba330..c8763ccea273 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java @@ -15,7 +15,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -29,6 +28,8 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Nesterovs; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -47,7 +48,7 @@ public class TestEarlyStopping { public void testEarlyStoppingIris() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); @@ -90,7 +91,7 @@ public void testEarlyStoppingIris() { public void testEarlyStoppingEveryNEpoch() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.01)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); @@ -118,7 +119,7 @@ public void testEarlyStoppingEveryNEpoch() { public void testEarlyStoppingIrisMultiEpoch() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); @@ -166,7 +167,7 @@ public void testBadTuning() { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(5.0) //Intentionally huge LR + .updater(new Sgd(5.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -205,7 +206,7 @@ public void testTimeTermination() { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(1e-6).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); @@ -247,7 +248,7 @@ public void testNoImprovementNEpochsTermination() { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(0.0).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); @@ -285,8 +286,8 @@ public void testMinImprovementNEpochsTermination() { Random rng = new Random(123); Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123).iterations(10) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.0) - .updater(Updater.NESTEROVS).momentum(0.9).list() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .updater(new Nesterovs(0.0,0.9)).list() .layer(0, new DenseLayer.Builder().nIn(1).nOut(20) .weightInit(WeightInit.XAVIER).activation( Activation.TANH) @@ -333,7 +334,7 @@ public void testMinImprovementNEpochsTermination() { public void testEarlyStoppingGetBestModel() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); @@ -364,14 +365,14 @@ public void testEarlyStoppingGetBestModel() { assertEquals(net.conf().getOptimizationAlgo(), mln.conf().getOptimizationAlgo()); BaseLayer bl = (BaseLayer) net.conf().getLayer(); assertEquals(bl.getActivationFn().toString(), ((BaseLayer) mln.conf().getLayer()).getActivationFn().toString()); - assertEquals(bl.getUpdater(), ((BaseLayer) mln.conf().getLayer()).getUpdater()); + assertEquals(bl.getIUpdater(), ((BaseLayer) mln.conf().getLayer()).getIUpdater()); } @Test public void testListeners() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java index 90d7dc9902ff..b700e0b14a18 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java @@ -31,7 +31,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.weights.WeightInit; @@ -40,6 +39,7 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,7 +55,7 @@ public class TestEarlyStoppingCompGraph { public void testEarlyStoppingIris() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") + .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").pretrain(false).backprop(true).build(); @@ -98,7 +98,7 @@ public void testBadTuning() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(5.0) //Intentionally huge LR + .updater(new Sgd(5.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") @@ -134,7 +134,7 @@ public void testTimeTermination() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(1e-6).weightInit(WeightInit.XAVIER).graphBuilder() + .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") @@ -175,7 +175,7 @@ public void testNoImprovementNEpochsTermination() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(0.0).weightInit(WeightInit.XAVIER).graphBuilder() + .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") @@ -209,7 +209,7 @@ public void testNoImprovementNEpochsTermination() { public void testListeners() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") + .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").pretrain(false).backprop(true).build(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/eval/EvalTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/eval/EvalTest.java index 12f7c58a15df..40e0419ac882 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/eval/EvalTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/eval/EvalTest.java @@ -29,7 +29,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -47,6 +46,7 @@ import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.io.ClassPathResource; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.util.FeatureUtil; @@ -179,7 +179,7 @@ public void testIris() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(1).seed(42) - .learningRate(1e-6).list() + .updater(new Sgd(1e-6)).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(2).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( @@ -623,8 +623,8 @@ public void testEvaluationWithMetaData() throws Exception { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) - .learningRate(0.1).list() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) + .list() .layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(4).nOut(3).build()) .pretrain(false).backprop(true).build(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java index ceabfd54382b..3db596ce5f2e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java @@ -5,7 +5,6 @@ import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -23,6 +22,7 @@ import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization; import org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Random; @@ -54,8 +54,8 @@ public void testGradient2dSimple() { INDArray labels = ds.getLabels(); MultiLayerConfiguration.Builder builder = - new NeuralNetConfiguration.Builder().learningRate(1.0).regularization(false) - .updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + new NeuralNetConfiguration.Builder().updater(new NoOp()) + .seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3) .activation(Activation.IDENTITY).build()) @@ -93,8 +93,8 @@ public void testGradientCnnSimple() { labels.putScalar(i, r.nextInt(nOut), 1.0); } - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0) - .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 2)).list() .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) .activation(Activation.IDENTITY).build()) @@ -157,9 +157,9 @@ public void testGradientBNWithCNNandSubsampling() { Activation outputActivation = outputActivations[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(12345) - .regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]).l2(l2vals[j]) + .l2(l2vals[j]) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) - .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION) + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-2, 2)).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3) .activation(afn).build()) @@ -256,10 +256,9 @@ public void testGradientDense() { MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() - .regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]) .l2(l2vals[j]) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) - .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION) + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-2, 2)).seed(12345L).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(4) .activation(afn).build()) @@ -325,8 +324,8 @@ public void testGradient2dFixedGammaBeta() { INDArray input = ds.getFeatureMatrix(); INDArray labels = ds.getLabels(); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0) - .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp()) + .seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY).build()) .layer(1, new BatchNormalization.Builder().lockGammaBeta(true).gamma(2.0).beta(0.5).nOut(3) @@ -364,8 +363,8 @@ public void testGradientCnnFixedGammaBeta() { labels.putScalar(i, r.nextInt(nOut), 1.0); } - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0) - .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp()) + .seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 2)).list() .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) .activation(Activation.IDENTITY).build()) @@ -400,8 +399,8 @@ public void testBatchNormCompGraphSimple() { int minibatchSize = 3; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).updater(Updater.NONE) - .weightInit(WeightInit.XAVIER).regularization(false).graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).updater(new NoOp()) + .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .setInputTypes(InputType.convolutional(height, width, channels)) .addLayer("bn", new BatchNormalization.Builder().build(), "in") .addLayer("out", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) @@ -465,9 +464,8 @@ public void testGradientBNWithCNNandSubsamplingCompGraph() { Activation outputActivation = outputActivations[i]; ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) - .regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]).l2(l2vals[j]) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) - .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION) + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder() .addInputs("in") .addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java index c51d1782c9b1..5c83a0f46176 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java @@ -3,7 +3,6 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -15,6 +14,7 @@ import org.nd4j.linalg.api.buffer.util.DataTypeUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import static org.junit.Assert.assertEquals; @@ -69,7 +69,7 @@ public void testCnn1DWithZeroPadding1D() { } MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .learningRate(1.0).updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION) + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list() .layer(new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel) .stride(stride).padding(padding).nIn(convNIn).nOut(convNOut1) @@ -145,8 +145,8 @@ public void testCnn1DWithSubsampling1D() { } } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .learningRate(1.0).updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list() .layer(0, new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel) .stride(stride).padding(padding).nIn(convNIn).nOut(convNOut1) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java index 2c2165e73148..1263ed9c7adc 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java @@ -5,7 +5,6 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -18,6 +17,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Arrays; @@ -44,26 +44,26 @@ public void testGradientCNNMLN() { // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) - String[] activFns = {"sigmoid", "tanh"}; + Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; - String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here + Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatureMatrix(); INDArray labels = ds.getLabels(); - for (String afn : activFns) { + for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { LossFunctions.LossFunction lf = lossFunctions[i]; - String outputActivation = outputActivations[i]; + Activation outputActivation = outputActivations[i]; - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().regularization(false) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(Updater.NONE) + MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() + .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build()) @@ -117,12 +117,12 @@ public void testGradientCNNL1L2MLN() { // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) - String[] activFns = {"sigmoid", "tanh"}; + Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; - String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here + Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); @@ -135,25 +135,25 @@ public void testGradientCNNL1L2MLN() { double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; - for (String afn : activFns) { + for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) { LossFunctions.LossFunction lf = lossFunctions[i]; - String outputActivation = outputActivations[i]; + Activation outputActivation = outputActivations[i]; double l2 = l2vals[k]; double l1 = l1vals[k]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() - .regularization(true).l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k]) + .l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k]) .optimizationAlgo( OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(new int[] {1, 1}).nIn(1).nOut(6) .weightInit(WeightInit.XAVIER).activation(afn) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3) - .weightInit(WeightInit.XAVIER).updater(Updater.NONE).build()) + .weightInit(WeightInit.XAVIER).updater(new NoOp()).build()) .pretrain(false).backprop(true) .setInputType(InputType.convolutionalFlat(1, 4, 1)); @@ -230,8 +230,8 @@ public void testCnnWithUpsampling() { } MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0) - .updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION) + new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) .list().layer(new ConvolutionLayer.Builder(kernel, stride, padding).nIn(inputDepth) @@ -281,12 +281,12 @@ public void testCnnWithSubsampling() { int[] padding = {0, 0}; int pnorm = 2; - String[] activations = {"sigmoid", "tanh"}; + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; SubsamplingLayer.PoolingType[] poolingTypes = new SubsamplingLayer.PoolingType[] {SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; - for (String afn : activations) { + for (Activation afn : activations) { for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { for (int minibatchSize : minibatchSizes) { INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); @@ -296,8 +296,8 @@ public void testCnnWithSubsampling() { } MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0) - .updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION) + new NeuralNetConfiguration.Builder().updater(new NoOp()) + .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) .list().layer(0, new ConvolutionLayer.Builder(kernel, @@ -348,12 +348,12 @@ public void testCnnWithSubsamplingV2() { int[] padding = {0, 0}; int pNorm = 3; - String[] activations = {"sigmoid", "tanh"}; + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; SubsamplingLayer.PoolingType[] poolingTypes = new SubsamplingLayer.PoolingType[] {SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; - for (String afn : activations) { + for (Activation afn : activations) { for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { for (int minibatchSize : minibatchSizes) { INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); @@ -363,8 +363,7 @@ public void testCnnWithSubsamplingV2() { } MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0) - .updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION) + new NeuralNetConfiguration.Builder().updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) .list().layer(0, new ConvolutionLayer.Builder(kernel, @@ -407,14 +406,14 @@ public void testCnnMultiLayer() { int height = 5; int[] inputDepths = {1, 2, 4}; - String[] activations = {"sigmoid", "tanh"}; + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; SubsamplingLayer.PoolingType[] poolingTypes = new SubsamplingLayer.PoolingType[] { SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG}; Nd4j.getRandom().setSeed(12345); for (int inputDepth : inputDepths) { - for (String afn : activations) { + for (Activation afn : activations) { for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { for (int minibatchSize : minibatchSizes) { INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); @@ -423,8 +422,8 @@ public void testCnnMultiLayer() { labels.putScalar(new int[] {i, i % nOut}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) - .regularization(false).learningRate(1.0).updater(Updater.SGD).activation(afn) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new NoOp()) + .activation(afn) .list() .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1) .padding(0, 0).nIn(inputDepth).nOut(2).build())//output: (5-2+0)/1+1 = 4 @@ -486,7 +485,7 @@ public void testCnnSamePaddingMode() { } MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) - .regularization(false).learningRate(1.0).updater(Updater.SGD) + .updater(new NoOp()) .activation(Activation.TANH).convolutionMode(ConvolutionMode.Same).list() .layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) .stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build()) @@ -553,7 +552,7 @@ public void testCnnSamePaddingModeStrided() { .stride(stride, stride).padding(0, 0).build(); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) - .regularization(false).learningRate(1.0).updater(Updater.SGD) + .updater(new NoOp()) .activation(Activation.TANH).convolutionMode(ConvolutionMode.Same).list() .layer(0, convFirst ? convLayer : poolLayer) .layer(1, convFirst ? poolLayer : convLayer) @@ -613,8 +612,7 @@ public void testCnnZeroPaddingLayer() { for (int[] zeroPad : zeroPadLayer) { MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0) - .updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION) + new NeuralNetConfiguration.Builder().updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)).list() .layer(0, new ConvolutionLayer.Builder(kernel, stride, padding) .nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5 @@ -688,7 +686,7 @@ public void testCnnDilated() { } NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345) - .learningRate(1.0).updater(Updater.SGD) + .updater(new NoOp()) .activation(Activation.TANH).convolutionMode(cm).list() .layer(new ConvolutionLayer.Builder().name("layer 0") .kernelSize(k, k) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java index 275a36bd6185..ce7280ce6739 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java @@ -3,7 +3,6 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -15,6 +14,7 @@ import org.nd4j.linalg.api.buffer.util.DataTypeUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Random; @@ -53,8 +53,8 @@ public void testLSTMGlobalPoolingBasicMultiLayer() { for (int miniBatchSize : minibatchSizes) { for (PoolingType pt : poolingTypes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .build()) @@ -116,8 +116,8 @@ public void testCnnGlobalPoolingBasicMultiLayer() { for (int miniBatchSize : minibatchSizes) { for (PoolingType pt : poolingTypes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(layerDepth) .build()) @@ -171,8 +171,8 @@ public void testLSTMWithMasking() { for (PoolingType pt : poolingTypes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .build()) @@ -255,8 +255,8 @@ public void testCnnGlobalPoolingMasking() { stride = new int[] {inputH, 1}; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1.0)).convolutionMode(ConvolutionMode.Same) .seed(12345L).list() .layer(0, new ConvolutionLayer.Builder().kernelSize(kernel).stride(stride) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java index d6390e6b526f..2d1af110e3c3 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java @@ -4,7 +4,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -20,6 +19,7 @@ import org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler; import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import java.util.Random; @@ -48,11 +48,11 @@ public void testGradientMLP2LayerIrisSimple() { // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) - String[] activFns = {"sigmoid", "tanh", "softplus"}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities + Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; - String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here + Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataNormalization scaler = new NormalizerMinMaxScaler(); DataSetIterator iter = new IrisDataSetIterator(150, 150); scaler.fit(iter); @@ -62,25 +62,24 @@ public void testGradientMLP2LayerIrisSimple() { INDArray input = ds.getFeatureMatrix(); INDArray labels = ds.getLabels(); - for (String afn : activFns) { + for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { LossFunction lf = lossFunctions[i]; - String outputActivation = outputActivations[i]; + Activation outputActivation = outputActivations[i]; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .seed(12345L) .list().layer(0, new DenseLayer.Builder().nIn(4).nOut(3) .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) - .activation(afn).updater( - Updater.SGD) + .activation(afn) .build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nIn(3).nOut(3) .weightInit(WeightInit.DISTRIBUTION) - .dist(new NormalDistribution(0, 1)).updater(Updater.SGD).build()) + .dist(new NormalDistribution(0, 1)).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); @@ -128,11 +127,11 @@ public void testGradientMLP2LayerIrisL1L2Simple() { //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied //Need to run gradient through updater, so that L2 can be applied - String[] activFns = {"sigmoid", "tanh"}; + Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; - String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here + Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataNormalization scaler = new NormalizerMinMaxScaler(); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -149,17 +148,17 @@ public void testGradientMLP2LayerIrisL1L2Simple() { double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; - for (String afn : activFns) { + for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) { LossFunction lf = lossFunctions[i]; - String outputActivation = outputActivations[i]; + Activation outputActivation = outputActivations[i]; double l2 = l2vals[k]; double l1 = l1vals[k]; MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1) + new NeuralNetConfiguration.Builder().l2(l2).l1(l1) .l2Bias(biasL2[k]).l1Bias(biasL1[k]) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L) @@ -168,12 +167,12 @@ public void testGradientMLP2LayerIrisL1L2Simple() { .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE) + .updater(new NoOp()) .activation(afn).build()) .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3) .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE) + .updater(new NoOp()) .activation(outputActivation).build()) .pretrain(false).backprop(true).build(); @@ -230,17 +229,17 @@ public void testEmbeddingLayerSimple() { labels.putScalar(new int[] {i, r.nextInt(3)}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.1) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.1) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345L) .list().layer(0, new EmbeddingLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).activation( + .updater(new NoOp()).activation( Activation.TANH) .build()) .layer(1, new OutputLayer.Builder(LossFunction.MCXENT).nIn(3).nOut(3) .weightInit(WeightInit.XAVIER).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).activation(Activation.SOFTMAX).build()) + .updater(new NoOp()).activation(Activation.SOFTMAX).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); @@ -268,7 +267,7 @@ public void testRbm() { boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunction[] lossFunctions = {LossFunction.MSE, LossFunction.KL_DIVERGENCE}; - String[] outputActivations = {"softmax", "sigmoid"}; //i.e., lossFunctions[i] used with outputActivations[i] here + Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataNormalization scaler = new NormalizerMinMaxScaler(); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -287,21 +286,20 @@ public void testRbm() { for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) { LossFunction lf = lossFunctions[i]; - String outputActivation = outputActivations[i]; + Activation outputActivation = outputActivations[i]; double l2 = l2vals[k]; double l1 = l1vals[k]; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2) - .l1(l1).learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(l2).l1(l1).updater(new NoOp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .updater(new NoOp()) .seed(12345L) .list().layer(0, new RBM.Builder(hidunit, RBM.VisibleUnit.BINARY).nIn(4).nOut(3) - .weightInit(WeightInit.UNIFORM).updater( - Updater.SGD) + .weightInit(WeightInit.UNIFORM) .build()) .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3) - .weightInit(WeightInit.XAVIER).updater(Updater.SGD) + .weightInit(WeightInit.XAVIER) .activation(outputActivation).build()) .pretrain(false).backprop(true).build(); @@ -354,11 +352,11 @@ public void testAutoEncoder() { //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied //Need to run gradient through updater, so that L2 can be applied - String[] activFns = {"sigmoid", "tanh"}; + Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; - String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here + Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; DataNormalization scaler = new NormalizerMinMaxScaler(); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -375,22 +373,23 @@ public void testAutoEncoder() { double[] l2vals = {0.2, 0.0, 0.2}; double[] l1vals = {0.0, 0.3, 0.3}; //i.e., use l2vals[i] with l1vals[i] - for (String afn : activFns) { + for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) { LossFunction lf = lossFunctions[i]; - String outputActivation = outputActivations[i]; + Activation outputActivation = outputActivations[i]; double l2 = l2vals[k]; double l1 = l1vals[k]; Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(true).learningRate(1.0) + new NeuralNetConfiguration.Builder() + .updater(new NoOp()) .l2(l2).l1(l1) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L).weightInit(WeightInit.DISTRIBUTION) - .dist(new NormalDistribution(0, 1)).updater(Updater.SGD) + .dist(new NormalDistribution(0, 1)) .list().layer(0, new AutoEncoder.Builder().nIn(4).nOut(3) .activation(afn).build()) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java index 9f2f27a0e363..48184b9f502b 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java @@ -5,7 +5,6 @@ import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.GaussianDistribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; @@ -28,6 +27,7 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Arrays; @@ -54,8 +54,8 @@ public void testBasicIris() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE) - .learningRate(1.0).graphBuilder().addInputs("input") + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(new NoOp()) + .graphBuilder().addInputs("input") .addLayer("firstLayer", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(), "input") @@ -99,8 +99,8 @@ public void testBasicIrisWithMerging() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE) - .learningRate(1.0).graphBuilder().addInputs("input") + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(new NoOp()) + .graphBuilder().addInputs("input") .addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(), "input") .addLayer("l2", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(), @@ -156,7 +156,7 @@ public void testBasicIrisWithElementWiseNode() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input") + .updater(new NoOp()).graphBuilder().addInputs("input") .addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(), "input") .addLayer("l2", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.SIGMOID) @@ -213,7 +213,7 @@ public void testBasicIrisWithElementWiseNodeInputSizeGreaterThanTwo() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input") + .updater(new NoOp()).graphBuilder().addInputs("input") .addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(), "input") .addLayer("l2", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.SIGMOID) @@ -266,8 +266,8 @@ public void testCnnDepthMerge() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.1)).updater(Updater.NONE) - .learningRate(1.0).graphBuilder().addInputs("input") + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.1)) + .updater(new NoOp()).graphBuilder().addInputs("input") .addLayer("l1", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0) .nIn(2).nOut(2).activation(Activation.TANH).build(), "input") .addLayer("l2", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0) @@ -313,7 +313,7 @@ public void testLSTMWithMerging() { new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0.2, 0.6)) - .updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input") + .updater(new NoOp()).graphBuilder().addInputs("input") .setOutputs("out") .addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(4) @@ -371,8 +371,8 @@ public void testLSTMWithSubset() { Nd4j.getRandom().setSeed(1234); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(1234) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE) - .learningRate(1.0).graphBuilder().addInputs("input").setOutputs("out") + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) + .updater(new NoOp()).graphBuilder().addInputs("input").setOutputs("out") .addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(8).activation(Activation.TANH).build(), "input") .addVertex("subset", new SubsetVertex(0, 3), "lstm1") @@ -412,8 +412,8 @@ public void testLSTMWithLastTimeStepVertex() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE) - .learningRate(1.0).graphBuilder().addInputs("input").setOutputs("out") + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) + .updater(new NoOp()).graphBuilder().addInputs("input").setOutputs("out") .addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(4).activation(Activation.TANH).build(), "input") .addVertex("lastTS", new LastTimeStepVertex("input"), "lstm1") @@ -465,7 +465,7 @@ public void testLSTMWithDuplicateToTimeSeries() { new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).learningRate(1.0).graphBuilder() + .updater(new NoOp()).graphBuilder() .addInputs("input1", "input2").setOutputs("out") .addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(4) @@ -517,8 +517,8 @@ public void testMultipleInputsLayer() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE) - .learningRate(1.0).activation(Activation.TANH).graphBuilder().addInputs("i0", "i1", "i2") + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) + .updater(new NoOp()).activation(Activation.TANH).graphBuilder().addInputs("i0", "i1", "i2") .addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i0") .addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i1") .addLayer("d2", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i2") @@ -559,8 +559,8 @@ public void testMultipleOutputsLayer() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE) - .learningRate(1.0).activation(Activation.TANH).graphBuilder().addInputs("i0") + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) + .updater(new NoOp()).activation(Activation.TANH).graphBuilder().addInputs("i0") .addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i0") .addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "d0") .addLayer("d2", new DenseLayer.Builder().nIn(2).nOut(2).build(), "d0") @@ -598,8 +598,8 @@ public void testMultipleOutputsMergeVertex() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE) - .learningRate(1.0).activation(Activation.TANH).graphBuilder().addInputs("i0", "i1", "i2") + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) + .updater(new NoOp()).activation(Activation.TANH).graphBuilder().addInputs("i0", "i1", "i2") .addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i0") .addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i1") .addLayer("d2", new DenseLayer.Builder().nIn(2).nOut(2).build(), "i2") @@ -645,8 +645,8 @@ public void testMultipleOutputsMergeCnn() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE) - .learningRate(1.0).activation(Activation.TANH).graphBuilder().addInputs("input") + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) + .updater(new NoOp()).activation(Activation.TANH).graphBuilder().addInputs("input") .addLayer("l0", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0) .nIn(2).nOut(2).activation(Activation.TANH).build(), "input") .addLayer("l1", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0) @@ -694,7 +694,7 @@ public void testBasicIrisTripletStackingL2Loss() { new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).learningRate(1.0).graphBuilder() + .updater(new NoOp()).graphBuilder() .addInputs("input1", "input2", "input3") .addVertex("stack1", new StackVertex(), "input1", "input2", "input3") .addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5) @@ -767,7 +767,7 @@ public void testBasicCenterLoss() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new GaussianDistribution(0, 1)) - .updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input1") + .updater(new NoOp()).graphBuilder().addInputs("input1") .addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH) .build(), "input1") .addLayer("cl", new CenterLossOutputLayer.Builder() @@ -828,8 +828,8 @@ public void testCnnPoolCenterLoss() { for (boolean train : trainFirst) { for (double lambda : new double[] {0.0, 0.5, 2.0}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(3).build()) .layer(1, new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build()) @@ -883,7 +883,7 @@ public void testBasicL2() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder() + .activation(Activation.TANH).updater(new NoOp()).graphBuilder() .addInputs("in1", "in2").addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in1") .addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in2") .addVertex("l2", new L2Vertex(), "d0", "d1") @@ -934,7 +934,7 @@ public void testBasicStackUnstack() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder() + .activation(Activation.TANH).updater(new NoOp()).graphBuilder() .addInputs("in1", "in2") .addLayer("d0", new DenseLayer.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in1") .addLayer("d1", new DenseLayer.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in2") @@ -988,7 +988,7 @@ public void testBasicStackUnstackDebug() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder() + .activation(Activation.TANH).updater(new NoOp()).graphBuilder() .addInputs("in1", "in2").addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in1") .addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in2") .addVertex("stack", new StackVertex(), "d0", "d1") @@ -1047,7 +1047,7 @@ public void testBasicStackUnstackVariableLengthTS() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder() + .activation(Activation.TANH).updater(new NoOp()).graphBuilder() .addInputs("in1", "in2") .addLayer("d0", new GravesLSTM.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in1") .addLayer("d1", new GravesLSTM.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in2") @@ -1109,7 +1109,7 @@ public void testBasicTwoOutputs() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder() + .activation(Activation.TANH).updater(new NoOp()).graphBuilder() .addInputs("in1", "in2").addLayer("d0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in1") .addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "in2") .addLayer("out1", @@ -1164,7 +1164,7 @@ public void testL2NormalizeVertex2d() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder() + .activation(Activation.TANH).updater(new NoOp()).graphBuilder() .addInputs("in1").addLayer("d1", new DenseLayer.Builder().nIn(2).nOut(3).build(), "in1") .addVertex("norm", new L2NormalizeVertex(), "d1") .addLayer("out1", @@ -1210,7 +1210,7 @@ public void testL2NormalizeVertex4d() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder() + .activation(Activation.TANH).updater(new NoOp()).graphBuilder() .addInputs("in1") .addLayer("d1", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(2).build(), "in1") @@ -1258,9 +1258,9 @@ public void testGraphEmbeddingLayerSimple() { labels.putScalar(new int[] {i, r.nextInt(3)}, 1.0); } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.1) + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.1) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345L) - .updater(Updater.NONE).graphBuilder().addInputs("in") + .updater(new NoOp()).graphBuilder().addInputs("in") .addLayer("0", new EmbeddingLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build(), "in") .addLayer("1", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java index 89663b404f2b..8bee4c8f96c3 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java @@ -3,7 +3,6 @@ import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -15,6 +14,7 @@ import org.nd4j.linalg.api.buffer.util.DataTypeUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.impl.*; @@ -105,14 +105,14 @@ public void gradientCheckMaskingOutputSimple() { maskArr.putScalar(new int[] {0, j}, mask[i][j] ? 1.0 : 0.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L) .list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .layer(1, new RnnOutputLayer.Builder(s.lf).activation(s.act).nIn(layerSize).nOut(s.nOut) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); @@ -150,8 +150,8 @@ public void testBidirectionalLSTMMasking() { int testNum = 0; for (INDArray mask : masks) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() .layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize) .activation(Activation.TANH).build()) @@ -246,7 +246,7 @@ public void testPerOutputMaskingMLP() { Activation a = act[i]; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345) .list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) @@ -340,7 +340,7 @@ public void testPerOutputMaskingRnn() { Activation a = act[i]; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345) .list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) @@ -372,7 +372,7 @@ public void testPerOutputMaskingRnn() { //Check the equivalent compgraph: Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration cg = new NeuralNetConfiguration.Builder().updater(Updater.NONE) + ComputationGraphConfiguration cg = new NeuralNetConfiguration.Builder().updater(new NoOp()) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 2)).seed(12345) .graphBuilder().addInputs("in") .addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java index c96f4f3b1065..855c37e3a428 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java @@ -2,7 +2,6 @@ import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -16,6 +15,7 @@ import org.nd4j.linalg.api.buffer.util.DataTypeUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Random; @@ -52,8 +52,8 @@ public void testGradientLRNSimple() { labels.putScalar(i, r.nextInt(nOut), 1.0); } - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0) - .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp()) + .seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 2)).list() .layer(0, new ConvolutionLayer.Builder().nOut(6).kernelSize(2, 2).stride(1, 1) .activation(Activation.TANH).build()) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java index 701a674e7ae0..faa2d67ef961 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java @@ -16,6 +16,7 @@ import org.nd4j.linalg.api.buffer.util.DataTypeUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import java.util.Random; @@ -57,27 +58,27 @@ public void testLSTMBasicMultiLayer() { if (graves) { l0 = new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.SIGMOID) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)) - .updater(Updater.NONE).build(); + .updater(new NoOp()).build(); l1 = new GravesLSTM.Builder().nIn(layerSize).nOut(layerSize).activation(Activation.SIGMOID) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)) - .updater(Updater.NONE).build(); + .updater(new NoOp()).build(); } else { l0 = new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.SIGMOID) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)) - .updater(Updater.NONE).build(); + .updater(new NoOp()).build(); l1 = new LSTM.Builder().nIn(layerSize).nOut(layerSize).activation(Activation.SIGMOID) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)) - .updater(Updater.NONE).build(); + .updater(new NoOp()).build(); } MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list() + new NeuralNetConfiguration.Builder().seed(12345L).list() .layer(0, l0).layer(1, l1) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .weightInit(WeightInit.DISTRIBUTION) - .dist(new NormalDistribution(0, 1.0)).updater(Updater.NONE) + .dist(new NormalDistribution(0, 1.0)).updater(new NoOp()) .build()) .pretrain(false).backprop(true).build(); @@ -166,9 +167,9 @@ public void testGradientLSTMFull() { double l1 = l1vals[k]; NeuralNetConfiguration.Builder conf = - new NeuralNetConfiguration.Builder().regularization(l1 > 0.0 || l2 > 0.0) + new NeuralNetConfiguration.Builder() .seed(12345L).weightInit(WeightInit.DISTRIBUTION) - .dist(new NormalDistribution(0, 1)).updater(Updater.NONE); + .dist(new NormalDistribution(0, 1)).updater(new NoOp()); if (l1 > 0.0) conf.l1(l1); @@ -255,9 +256,9 @@ public void testGradientLSTMEdgeCases() { layer = new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).regularization(false) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).list().layer(0, layer) + .updater(new NoOp()).list().layer(0, layer) .layer(1, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) .nIn(layerSize).nOut(nOut).build()) .pretrain(false).backprop(true).build(); @@ -321,7 +322,7 @@ public void testGradientGravesBidirectionalLSTMFull() { double l1 = l1vals[k]; NeuralNetConfiguration.Builder conf = - new NeuralNetConfiguration.Builder().regularization(l1 > 0.0 || l2 > 0.0); + new NeuralNetConfiguration.Builder(); if (l1 > 0.0) conf.l1(l1); if (l2 > 0.0) @@ -341,7 +342,7 @@ public void testGradientGravesBidirectionalLSTMFull() { .build()) .layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize) .nOut(nOut).weightInit(WeightInit.DISTRIBUTION) - .dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()) + .dist(new NormalDistribution(0, 1)).updater(new NoOp()).build()) .pretrain(false).backprop(true).build(); @@ -398,7 +399,7 @@ public void testGradientGravesBidirectionalLSTMEdgeCases() { } } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L) .list() .layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize) .weightInit(WeightInit.DISTRIBUTION) @@ -407,7 +408,7 @@ public void testGradientGravesBidirectionalLSTMEdgeCases() { .build()) .layer(1, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) .nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION) - .dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()) + .dist(new NormalDistribution(0, 1)).updater(new NoOp()).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); @@ -445,7 +446,7 @@ public void testGradientCnnFfRnn() { } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).seed(12345) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()).seed(12345) .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list() .layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).nOut(5).stride(1, 1) .activation(Activation.TANH).build()) //Out: (10-5)/1+1 = 6 -> 6x6x5 diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java index a61249e2f860..c30013991e9e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java @@ -4,7 +4,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.LossLayer; @@ -21,6 +20,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.BooleanIndexing; import org.nd4j.linalg.indexing.conditions.Conditions; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.impl.*; import org.nd4j.shade.jackson.databind.ObjectMapper; @@ -62,36 +62,36 @@ public void lossFunctionGradientCheck() { LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(), LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),}; - String[] outputActivationFn = new String[] {"sigmoid", //xent - "sigmoid", //xent - "tanh", //cosine - "tanh", //hinge -> trying to predict 1 or -1 - "sigmoid", //kld -> probab so should be between 0 and 1 - "softmax", //kld + softmax - "tanh", //l1 - "rationaltanh", //l1 - "softmax", //l1 + softmax - "tanh", //l2 - "softmax", //l2 + softmax - "identity", //mae - "softmax", //mae + softmax - "identity", //mape - "softmax", //mape + softmax - "softmax", //mcxent - "identity", //mse - "softmax", //mse + softmax - "sigmoid", //msle - requires positive labels/activations due to log - "softmax", //msle + softmax - "sigmoid", //nll - "softmax", //nll + softmax - "sigmoid", //poisson - requires positive predictions due to log... not sure if this is the best option - "tanh", //squared hinge - "sigmoid", //f-measure (binary, single sigmoid output) - "sigmoid", //f-measure (binary, single sigmoid output) - "softmax", //f-measure (binary, 2-label softmax output) - "softmax", //f-measure (binary, 2-label softmax output) - "identity", // MixtureDensity - "tanh", // MixtureDensity + tanh + Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent + Activation.SIGMOID, //xent + Activation.TANH, //cosine + Activation.TANH, //hinge -> trying to predict 1 or -1 + Activation.SIGMOID, //kld -> probab so should be between 0 and 1 + Activation.SOFTMAX, //kld + softmax + Activation.TANH, //l1 + Activation.RATIONALTANH, //l1 + Activation.SOFTMAX, //l1 + softmax + Activation.TANH, //l2 + Activation.SOFTMAX, //l2 + softmax + Activation.IDENTITY, //mae + Activation.SOFTMAX, //mae + softmax + Activation.IDENTITY, //mape + Activation.SOFTMAX, //mape + softmax + Activation.SOFTMAX, //mcxent + Activation.IDENTITY, //mse + Activation.SOFTMAX, //mse + softmax + Activation.SIGMOID, //msle - requires positive labels/activations due to log + Activation.SOFTMAX, //msle + softmax + Activation.SIGMOID, //nll + Activation.SOFTMAX, //nll + softmax + Activation.SIGMOID, //poisson - requires positive predictions due to log... not sure if this is the best option + Activation.TANH, //squared hinge + Activation.SIGMOID, //f-measure (binary, single sigmoid output) + Activation.SIGMOID, //f-measure (binary, single sigmoid output) + Activation.SOFTMAX, //f-measure (binary, 2-label softmax output) + Activation.SOFTMAX, //f-measure (binary, 2-label softmax output) + Activation.IDENTITY, // MixtureDensity + Activation.TANH, // MixtureDensity + tanh }; int[] nOut = new int[] {1, //xent @@ -140,7 +140,7 @@ public void lossFunctionGradientCheck() { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345) - .updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION) + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-2, 2)).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()) .layer(1, new OutputLayer.Builder().lossFunction(lossFunctions[i]) @@ -205,35 +205,35 @@ public void lossFunctionGradientCheckLossLayer() { new LossFMeasure(2.0), LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(), LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),}; - String[] outputActivationFn = new String[] {"sigmoid", //xent - "sigmoid", //xent - "tanh", //cosine - "tanh", //hinge -> trying to predict 1 or -1 - "sigmoid", //kld -> probab so should be between 0 and 1 - "softmax", //kld + softmax - "tanh", //l1 - "softmax", //l1 + softmax - "tanh", //l2 - "softmax", //l2 + softmax - "identity", //mae - "softmax", //mae + softmax - "identity", //mape - "softmax", //mape + softmax - "softmax", //mcxent - "identity", //mse - "softmax", //mse + softmax - "sigmoid", //msle - requires positive labels/activations due to log - "softmax", //msle + softmax - "sigmoid", //nll - "softmax", //nll + softmax - "sigmoid", //poisson - requires positive predictions due to log... not sure if this is the best option - "tanh", //squared hinge - "sigmoid", //f-measure (binary, single sigmoid output) - "sigmoid", //f-measure (binary, single sigmoid output) - "softmax", //f-measure (binary, 2-label softmax output) - "softmax", //f-measure (binary, 2-label softmax output) - "identity", // MixtureDensity - "tanh", // MixtureDensity + tanh + Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent + Activation.SIGMOID, //xent + Activation.TANH, //cosine + Activation.TANH, //hinge -> trying to predict 1 or -1 + Activation.SIGMOID, //kld -> probab so should be between 0 and 1 + Activation.SOFTMAX, //kld + softmax + Activation.TANH, //l1 + Activation.SOFTMAX, //l1 + softmax + Activation.TANH, //l2 + Activation.SOFTMAX, //l2 + softmax + Activation.IDENTITY, //mae + Activation.SOFTMAX, //mae + softmax + Activation.IDENTITY, //mape + Activation.SOFTMAX, //mape + softmax + Activation.SOFTMAX, //mcxent + Activation.IDENTITY, //mse + Activation.SOFTMAX, //mse + softmax + Activation.SIGMOID, //msle - requires positive labels/activations due to log + Activation.SOFTMAX, //msle + softmax + Activation.SIGMOID, //nll + Activation.SOFTMAX, //nll + softmax + Activation.SIGMOID, //poisson - requires positive predictions due to log... not sure if this is the best option + Activation.TANH, //squared hinge + Activation.SIGMOID, //f-measure (binary, single sigmoid output) + Activation.SIGMOID, //f-measure (binary, single sigmoid output) + Activation.SOFTMAX, //f-measure (binary, 2-label softmax output) + Activation.SOFTMAX, //f-measure (binary, 2-label softmax output) + Activation.IDENTITY, // MixtureDensity + Activation.TANH, // MixtureDensity + tanh }; int[] nOut = new int[] {1, //xent @@ -294,7 +294,7 @@ public void lossFunctionGradientCheckLossLayer() { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345) - .updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION) + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-2, 2)).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH) .build()) @@ -485,22 +485,22 @@ public void lossFunctionWeightedGradientCheck() { new LossMAPE(w), new LossMCXENT(w), new LossMSE(w), new LossMSE(w), new LossMSLE(w), new LossMSLE(w), new LossNegativeLogLikelihood(w), new LossNegativeLogLikelihood(w),}; - String[] outputActivationFn = new String[] {"sigmoid", //xent - "tanh", //l1 - "softmax", //l1 + softmax - "tanh", //l2 - "softmax", //l2 + softmax - "identity", //mae - "softmax", //mae + softmax - "identity", //mape - "softmax", //mape + softmax - "softmax", //mcxent - "identity", //mse - "softmax", //mse + softmax - "sigmoid", //msle - requires positive labels/activations due to log - "softmax", //msle + softmax - "sigmoid", //nll - "softmax", //nll + softmax + Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent + Activation.TANH, //l1 + Activation.SOFTMAX, //l1 + softmax + Activation.TANH, //l2 + Activation.SOFTMAX, //l2 + softmax + Activation.IDENTITY, //mae + Activation.SOFTMAX, //mae + softmax + Activation.IDENTITY, //mape + Activation.SOFTMAX, //mape + softmax + Activation.SOFTMAX, //mcxent + Activation.IDENTITY, //mse + Activation.SOFTMAX, //mse + softmax + Activation.SIGMOID, //msle - requires positive labels/activations due to log + Activation.SOFTMAX, //msle + softmax + Activation.SIGMOID, //nll + Activation.SOFTMAX, //nll + softmax }; int[] minibatchSizes = new int[] {1, 3}; @@ -513,7 +513,7 @@ public void lossFunctionWeightedGradientCheck() { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345) - .updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION) + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-3, 3)).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH) .build()) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java index b3a91e7c5cc3..083e70fe51da 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java @@ -1,27 +1,23 @@ package org.deeplearning4j.gradientcheck; +import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.util.ModelSerializer; import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.buffer.util.DataTypeUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; - import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -38,21 +34,6 @@ public class NoBiasGradientCheckTests { DataTypeUtil.setDTypeForContext(DataBuffer.Type.DOUBLE); } - private static void checkSerialization(MultiLayerNetwork net){ - try { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(net, baos, true); - byte[] bytes = baos.toByteArray(); - ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - MultiLayerNetwork net2 = ModelSerializer.restoreMultiLayerNetwork(bais, true); - assertEquals(net.getLayerWiseConfigurations().toJson(), net2.getLayerWiseConfigurations().toJson()); - assertEquals(net.params(), net2.params()); - } catch (IOException e ){ - throw new RuntimeException(e); - } - - } - @Test public void testGradientNoBiasDenseOutput() { @@ -70,8 +51,8 @@ public void testGradientNoBiasDenseOutput() { for (boolean denseHasBias : new boolean[]{true, false}) { for (boolean outHasBias : new boolean[]{true, false}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .updater(Updater.NONE) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()) .seed(12345L) .list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize) @@ -120,7 +101,7 @@ public void testGradientNoBiasDenseOutput() { DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(msg, gradOK); - checkSerialization(mln); + TestUtils.testModelSerialization(mln); } } } @@ -142,8 +123,8 @@ public void testGradientNoBiasRnnOutput() { for (boolean rnnOutHasBias : new boolean[]{true, false}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .updater(Updater.NONE) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()) .seed(12345L) .list() .layer(0, new LSTM.Builder().nIn(nIn).nOut(layerSize) @@ -178,7 +159,7 @@ public void testGradientNoBiasRnnOutput() { DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(msg, gradOK); - checkSerialization(mln); + TestUtils.testModelSerialization(mln); } } } @@ -202,8 +183,8 @@ public void testGradientNoBiasEmbedding() { for (boolean embeddingHasBias : new boolean[]{true, false}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .updater(Updater.NONE) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()) .seed(12345L) .list() .layer(0, new EmbeddingLayer.Builder().nIn(nIn).nOut(layerSize) @@ -239,7 +220,7 @@ public void testGradientNoBiasEmbedding() { DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(msg, gradOK); - checkSerialization(mln); + TestUtils.testModelSerialization(mln); } } } @@ -268,8 +249,8 @@ public void testCnnWithSubsamplingNoBias() { for(boolean cnnHasBias : new boolean[]{true, false}) { MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(1.0) - .updater(Updater.SGD).weightInit(WeightInit.DISTRIBUTION) + new NeuralNetConfiguration.Builder().updater(new NoOp()) + .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) .list() .layer(new ConvolutionLayer.Builder(kernel, @@ -305,7 +286,7 @@ public void testCnnWithSubsamplingNoBias() { assertTrue(msg, gradOK); - checkSerialization(net); + TestUtils.testModelSerialization(net); } } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java index 5873a9ba506e..be8f735f2a51 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java @@ -1,9 +1,7 @@ package org.deeplearning4j.gradientcheck; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.variational.*; @@ -19,6 +17,7 @@ import org.nd4j.linalg.api.ops.random.impl.BernoulliDistribution; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import org.nd4j.linalg.lossfunctions.impl.LossMAE; import org.nd4j.linalg.lossfunctions.impl.LossMSE; @@ -48,10 +47,10 @@ public void testVaeAsMLP() { //Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output //This gradient check tests this part - String[] activFns = {"identity", "tanh"}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities + Activation[] activFns = {Activation.IDENTITY, Activation.TANH}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; - String[] outputActivations = {"softmax", "tanh"}; //i.e., lossFunctions[i] used with outputActivations[i] here + Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here //use l2vals[i] with l1vals[i] double[] l2vals = {0.4, 0.0, 0.4, 0.4}; @@ -74,33 +73,31 @@ public void testVaeAsMLP() { int[] encoderSizes = encoderLayerSizes[ls]; int[] decoderSizes = decoderLayerSizes[ls]; - for (String afn : activFns) { + for (Activation afn : activFns) { for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) { LossFunction lf = lossFunctions[i]; - String outputActivation = outputActivations[i]; + Activation outputActivation = outputActivations[i]; double l2 = l2vals[k]; double l1 = l1vals[k]; MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1) + new NeuralNetConfiguration.Builder().l2(l2).l1(l1) + .updater(new NoOp()) .l2Bias(biasL2[k]).l1Bias(biasL1[k]) - .optimizationAlgo( - OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .learningRate(1.0).seed(12345L).list() + .updater(new NoOp()).seed(12345L).list() .layer(0, new VariationalAutoencoder.Builder().nIn(4) .nOut(3).encoderLayerSizes(encoderSizes) .decoderLayerSizes(decoderSizes) .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) - .activation(afn).updater( - Updater.SGD) + .activation(afn) .build()) .layer(1, new OutputLayer.Builder(lf) .activation(outputActivation).nIn(3).nOut(3) .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) - .updater(Updater.SGD).build()) + .build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); @@ -131,9 +128,9 @@ public void testVaeAsMLP() { @Test public void testVaePretrain() { - String[] activFns = {"identity", "identity", "tanh", "tanh"}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities - String[] pzxAfns = {"identity", "tanh", "identity", "tanh"}; - String[] pxzAfns = {"tanh", "identity", "tanh", "identity"}; + Activation[] activFns = {Activation.IDENTITY, Activation.IDENTITY, Activation.TANH, Activation.TANH}; //activation functions such as relu and hardtanh: may randomly fail due to discontinuities + Activation[] pzxAfns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH}; + Activation[] pxzAfns = {Activation.TANH, Activation.IDENTITY, Activation.TANH, Activation.IDENTITY}; //use l2vals[i] with l1vals[i] double[] l2vals = {0.4, 0.0, 0.4, 0.4}; @@ -153,22 +150,21 @@ public void testVaePretrain() { int[] decoderSizes = decoderLayerSizes[ls]; for (int j = 0; j < activFns.length; j++) { - String afn = activFns[j]; - String pzxAfn = pzxAfns[j]; - String pxzAfn = pxzAfns[j]; + Activation afn = activFns[j]; + Activation pzxAfn = pzxAfns[j]; + Activation pxzAfn = pxzAfns[j]; double l2 = l2vals[j]; //Ideally we'd do the cartesian product of l1/l2 and the activation functions, but that takes too long... double l1 = l1vals[j]; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2) - .l1(l1).l2Bias(biasL2[j]).l1Bias(biasL1[j]) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .learningRate(1.0).seed(12345L).weightInit(WeightInit.XAVIER).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(l2) + .l1(l1).l2Bias(biasL2[j]).l1Bias(biasL1[j]).updater(new NoOp()) + .seed(12345L).weightInit(WeightInit.XAVIER).list() .layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3) .encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes) .pzxActivationFunction(pzxAfn) .reconstructionDistribution( new GaussianReconstructionDistribution(pxzAfn)) - .activation(afn).updater(Updater.SGD).build()) + .activation(afn).build()) .pretrain(true).backprop(false).build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); @@ -255,8 +251,8 @@ public void testVaePretrainReconstructionDistributions() { throw new RuntimeException(); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.3) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3) + .updater(new NoOp()) .seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) .list().layer(0, new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3) @@ -264,7 +260,7 @@ public void testVaePretrainReconstructionDistributions() { .pzxActivationFunction(Activation.TANH) .reconstructionDistribution( reconstructionDistributions[i]) - .activation(Activation.TANH).updater(Updater.SGD) + .activation(Activation.TANH) .build()) .pretrain(true).backprop(false).build(); @@ -299,14 +295,14 @@ public void testVaePretrainMultipleSamples() { // for (int numSamples : new int[]{10}) { INDArray features = Nd4j.rand(minibatch, 4); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.3) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3) + .updater(new NoOp()) .seed(12345L).weightInit(WeightInit.XAVIER).list() .layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(5, 6) .decoderLayerSizes(7, 8).pzxActivationFunction(Activation.TANH) .reconstructionDistribution( new GaussianReconstructionDistribution(Activation.TANH)) - .numSamples(numSamples).activation(Activation.TANH).updater(Updater.SGD) + .numSamples(numSamples).activation(Activation.TANH) .build()) .pretrain(true).backprop(false).build(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java index d250e95b7b59..4638e8a7def5 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java @@ -3,8 +3,7 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; -import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.junit.Test; @@ -13,8 +12,9 @@ import org.nd4j.linalg.api.buffer.util.DataTypeUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; -import static org.junit.Assert.*; +import static org.junit.Assert.assertTrue; /** * @author Alex Black @@ -64,7 +64,7 @@ public void testYoloOutputLayer() { INDArray labels = yoloLabels(mb, c, h, w); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) - .learningRate(1.0).updater(Updater.SGD) + .updater(new NoOp()) .activation(a) .l1(l1[i]).l2(l2[i]) .convolutionMode(ConvolutionMode.Same) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java index 04d64aac5543..51c7dbae0192 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java @@ -7,10 +7,16 @@ import lombok.NoArgsConstructor; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; -import org.deeplearning4j.nn.conf.graph.*; +import org.deeplearning4j.nn.conf.graph.ElementWiseVertex; +import org.deeplearning4j.nn.conf.graph.GraphVertex; +import org.deeplearning4j.nn.conf.graph.MergeVertex; +import org.deeplearning4j.nn.conf.graph.SubsetVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InvalidInputTypeException; -import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.conf.misc.TestGraphVertex; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; @@ -20,6 +26,7 @@ import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import static org.junit.Assert.assertEquals; @@ -31,8 +38,8 @@ public class ComputationGraphConfigurationTest { public void testJSONBasic() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE) - .learningRate(1.0).graphBuilder().addInputs("input") + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(new NoOp()) + .graphBuilder().addInputs("input") .addLayer("firstLayer", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(), "input") @@ -231,43 +238,6 @@ public void testOutputOrderDoesntChangeWhenCloning() { assertEquals(json, jsonCloned); } - @Test - public void testBiasLr() { - //setup the network - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).learningRate(1e-2) - .biasLearningRate(0.5).graphBuilder().addInputs("in") - .addLayer("0", new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5) - .weightInit(WeightInit.XAVIER).activation(Activation.RELU).build(), "in") - .addLayer("1", new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build(), "0") - .addLayer("2", new DenseLayer.Builder().nOut(100).activation(Activation.RELU) - .biasLearningRate(0.25).build(), "1") - .addLayer("3", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build(), - "2") - .setOutputs("3").setInputTypes(InputType.convolutional(28, 28, 1)).build(); - - org.deeplearning4j.nn.conf.layers.BaseLayer l0 = - (BaseLayer) ((LayerVertex) conf.getVertices().get("0")).getLayerConf().getLayer(); - org.deeplearning4j.nn.conf.layers.BaseLayer l1 = - (BaseLayer) ((LayerVertex) conf.getVertices().get("1")).getLayerConf().getLayer(); - org.deeplearning4j.nn.conf.layers.BaseLayer l2 = - (BaseLayer) ((LayerVertex) conf.getVertices().get("2")).getLayerConf().getLayer(); - org.deeplearning4j.nn.conf.layers.BaseLayer l3 = - (BaseLayer) ((LayerVertex) conf.getVertices().get("3")).getLayerConf().getLayer(); - - assertEquals(0.5, l0.getBiasLearningRate(), 1e-6); - assertEquals(1e-2, l0.getLearningRate(), 1e-6); - - assertEquals(0.5, l1.getBiasLearningRate(), 1e-6); - assertEquals(1e-2, l1.getLearningRate(), 1e-6); - - assertEquals(0.25, l2.getBiasLearningRate(), 1e-6); - assertEquals(1e-2, l2.getLearningRate(), 1e-6); - - assertEquals(0.5, l3.getBiasLearningRate(), 1e-6); - assertEquals(1e-2, l3.getLearningRate(), 1e-6); - } - @AllArgsConstructor @NoArgsConstructor @Data diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java index b396a58ebd69..b2c92af134af 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java @@ -24,31 +24,31 @@ public void testJsonLossFunctions() { new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge(), new LossFMeasure(), new LossFMeasure(2.0)}; - String[] outputActivationFn = new String[] {"sigmoid", //xent - "sigmoid", //xent - "tanh", //cosine - "tanh", //hinge -> trying to predict 1 or -1 - "sigmoid", //kld -> probab so should be between 0 and 1 - "softmax", //kld + softmax - "tanh", //l1 - "softmax", //l1 + softmax - "tanh", //l2 - "softmax", //l2 + softmax - "identity", //mae - "softmax", //mae + softmax - "identity", //mape - "softmax", //mape + softmax - "softmax", //mcxent - "identity", //mse - "softmax", //mse + softmax - "sigmoid", //msle - requires positive labels/activations due to log - "softmax", //msle + softmax - "sigmoid", //nll - "softmax", //nll + softmax - "sigmoid", //poisson - requires positive predictions due to log... not sure if this is the best option - "tanh", //squared hinge - "sigmoid", //f-measure (binary, single sigmoid output) - "softmax" //f-measure (binary, 2-label softmax output) + Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent + Activation.SIGMOID, //xent + Activation.TANH, //cosine + Activation.TANH, //hinge -> trying to predict 1 or -1 + Activation.SIGMOID, //kld -> probab so should be between 0 and 1 + Activation.SOFTMAX, //kld + softmax + Activation.TANH, //l1 + Activation.SOFTMAX, //l1 + softmax + Activation.TANH, //l2 + Activation.SOFTMAX, //l2 + softmax + Activation.IDENTITY, //mae + Activation.SOFTMAX, //mae + softmax + Activation.IDENTITY, //mape + Activation.SOFTMAX, //mape + softmax + Activation.SOFTMAX, //mcxent + Activation.IDENTITY, //mse + Activation.SOFTMAX, //mse + softmax + Activation.SIGMOID, //msle - requires positive labels/activations due to log + Activation.SOFTMAX, //msle + softmax + Activation.SIGMOID, //nll + Activation.SOFTMAX, //nll + softmax + Activation.SIGMOID, //poisson - requires positive predictions due to log... not sure if this is the best option + Activation.TANH, //squared hinge + Activation.SIGMOID, //f-measure (binary, single sigmoid output) + Activation.SOFTMAX //f-measure (binary, 2-label softmax output) }; int[] nOut = new int[] {1, //xent diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java index fec5f2616551..b5c938f08a29 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java @@ -24,6 +24,7 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; +import org.deeplearning4j.nn.conf.weightnoise.DropConnect; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.api.IterationListener; @@ -31,6 +32,8 @@ import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.io.*; @@ -87,7 +90,7 @@ public void testConvnetJson() { //setup the network MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) - .regularization(true).l1(1e-1).l2(2e-4).useDropConnect(true).dropOut(0.5).miniBatch(true) + .l1(1e-1).l2(2e-4).weightNoise(new DropConnect(0.5)).miniBatch(true) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() .layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) @@ -121,7 +124,7 @@ public void testUpsamplingConvnetJson() { //setup the network MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) - .regularization(true).l1(1e-1).l2(2e-4).useDropConnect(true).dropOut(0.5).miniBatch(true) + .l1(1e-1).l2(2e-4).dropOut(0.5).miniBatch(true) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() .layer(new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) @@ -144,7 +147,7 @@ public void testUpsamplingConvnetJson() { @Test public void testGlobalPoolingJson() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).updater(Updater.NONE) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(5).build()) .layer(1, new GlobalPoolingLayer.Builder().poolingType(PoolingType.PNORM).pnorm(3).build()) @@ -363,13 +366,12 @@ public void testPreBackFineValidation() { @Test public void testBiasLr() { //setup the network - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).learningRate(1e-2) - .biasLearningRate(0.5).updater(Updater.ADAM).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new Adam(1e-2)) + .biasUpdater(new Adam(0.5)).list() .layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) .layer(1, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) - .layer(2, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).biasLearningRate(0.25) - .build()) + .layer(2, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) .setInputType(InputType.convolutional(28, 28, 1)).build(); @@ -379,17 +381,17 @@ public void testBiasLr() { org.deeplearning4j.nn.conf.layers.BaseLayer l2 = (BaseLayer) conf.getConf(2).getLayer(); org.deeplearning4j.nn.conf.layers.BaseLayer l3 = (BaseLayer) conf.getConf(3).getLayer(); - assertEquals(0.5, l0.getBiasLearningRate(), 1e-6); - assertEquals(1e-2, l0.getLearningRate(), 1e-6); + assertEquals(0.5, ((Adam)l0.getUpdaterByParam("b")).getLearningRate(), 1e-6); + assertEquals(1e-2, ((Adam)l0.getUpdaterByParam("W")).getLearningRate(), 1e-6); - assertEquals(0.5, l1.getBiasLearningRate(), 1e-6); - assertEquals(1e-2, l1.getLearningRate(), 1e-6); + assertEquals(0.5, ((Adam)l1.getUpdaterByParam("b")).getLearningRate(), 1e-6); + assertEquals(1e-2, ((Adam)l1.getUpdaterByParam("W")).getLearningRate(), 1e-6); - assertEquals(0.25, l2.getBiasLearningRate(), 1e-6); - assertEquals(1e-2, l2.getLearningRate(), 1e-6); + assertEquals(0.5, ((Adam)l2.getUpdaterByParam("b")).getLearningRate(), 1e-6); + assertEquals(1e-2, ((Adam)l2.getUpdaterByParam("W")).getLearningRate(), 1e-6); - assertEquals(0.5, l3.getBiasLearningRate(), 1e-6); - assertEquals(1e-2, l3.getLearningRate(), 1e-6); + assertEquals(0.5, ((Adam)l3.getUpdaterByParam("b")).getLearningRate(), 1e-6); + assertEquals(1e-2, ((Adam)l3.getUpdaterByParam("W")).getLearningRate(), 1e-6); } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java index 479988450f91..1c5bebb96df0 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java @@ -29,7 +29,7 @@ public class MultiNeuralNetConfLayerBuilderTest { int numIn = 10; int numOut = 5; double drop = 0.3; - String act = "softmax"; + Activation act = Activation.SOFTMAX; PoolingType poolType = PoolingType.MAX; int[] filterSize = new int[] {2, 2}; int filterDepth = 6; @@ -78,7 +78,7 @@ public void testRbmSetup() throws Exception { MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(123).iterations(5).maxNumLineSearchIterations(10) // Magical Optimisation Stuff - .regularization(true) + .list().layer(0, new RBM.Builder(RBM.HiddenUnit.RECTIFIED, RBM.VisibleUnit.GAUSSIAN).nIn(784).nOut(1000) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java index 013899a3bd72..70768512c17c 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java @@ -35,10 +35,9 @@ import org.nd4j.linalg.api.ops.impl.transforms.LeakyReLU; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.util.HashMap; - import static org.junit.Assert.*; /** @@ -92,14 +91,12 @@ public void testYaml() { public void testClone() { NeuralNetConfiguration conf = getRBMConfig(1, 1, WeightInit.UNIFORM, true); BaseLayer bl = (BaseLayer) conf.getLayer(); - bl.setMomentumSchedule(new HashMap()); conf.setStepFunction(new DefaultStepFunction()); NeuralNetConfiguration conf2 = conf.clone(); assertEquals(conf, conf2); assertNotSame(conf, conf2); - assertNotSame(bl.getMomentumSchedule(), ((BaseLayer) conf2.getLayer()).getMomentumSchedule()); assertNotSame(conf.getLayer(), conf2.getLayer()); assertNotSame(bl.getDist(), ((BaseLayer) conf2.getLayer()).getDist()); assertNotSame(conf.getStepFunction(), conf2.getStepFunction()); @@ -207,7 +204,7 @@ private static NeuralNetConfiguration getRBMConfig(int nIn, int nOut, WeightInit .activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build(); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().iterations(3) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).regularization(false).layer(layer) + .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).layer(layer) .build(); conf.setPretrain(pretrain); return conf; @@ -232,14 +229,11 @@ public void testLearningRateByParam() { int iteration = 3; INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)).list() .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(lr) - .biasLearningRate(biasLr).build()) - .layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).learningRate(0.7) - .build()) - .layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) + .updater(new Sgd(lr)).biasUpdater(new Sgd(biasLr)).build()) + .layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).updater(new Sgd(0.7)).build()) + .layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]).build()) .backprop(true).pretrain(false).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -248,11 +242,11 @@ public void testLearningRateByParam() { ConvexOptimizer opt = new StochasticGradientDescent(net.getDefaultConfiguration(), new NegativeDefaultStepFunction(), null, net); opt.checkTerminalConditions(gradientW, oldScore, newScore, iteration); - assertEquals(lr, net.getLayer(0).conf().getLearningRateByParam("W"), 1e-4); - assertEquals(biasLr, net.getLayer(0).conf().getLearningRateByParam("b"), 1e-4); - assertEquals(0.7, net.getLayer(1).conf().getLearningRateByParam("gamma"), 1e-4); - assertEquals(0.3, net.getLayer(2).conf().getLearningRateByParam("W"), 1e-4); //From global LR - assertEquals(0.3, net.getLayer(2).conf().getLearningRateByParam("b"), 1e-4); //From global LR + assertEquals(lr, ((Sgd)net.getLayer(0).conf().getLayer().getUpdaterByParam("W")).getLearningRate(), 1e-4); + assertEquals(biasLr, ((Sgd)net.getLayer(0).conf().getLayer().getUpdaterByParam("b")).getLearningRate(), 1e-4); + assertEquals(0.7, ((Sgd)net.getLayer(1).conf().getLayer().getUpdaterByParam("gamma")).getLearningRate(), 1e-4); + assertEquals(0.3, ((Sgd)net.getLayer(2).conf().getLayer().getUpdaterByParam("W")).getLearningRate(), 1e-4); //From global LR + assertEquals(0.3, ((Sgd)net.getLayer(2).conf().getLayer().getUpdaterByParam("W")).getLearningRate(), 1e-4); //From global LR } @Test @@ -297,13 +291,11 @@ public void testL1L2ByParam() { int iteration = 3; INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(8).regularization(true).l1(l1) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l1(l1) .l2(l2).list() - .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) + .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).build()) .layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).l2(0.5).build()) - .layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) + .layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]).build()) .backprop(true).pretrain(false).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -328,7 +320,7 @@ public void testLayerPretrainConfig() { org.deeplearning4j.nn.conf.layers.RBM layer = new org.deeplearning4j.nn.conf.layers.RBM.Builder(RBM.HiddenUnit.BINARY, RBM.VisibleUnit.BINARY) - .nIn(10).nOut(5).learningRate(1e-1f) + .nIn(10).nOut(5).updater(new Sgd(1e-1)) .lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build(); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).seed(42).layer(layer).build(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java index d5b9faf8ab5e..305674ad22e5 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java @@ -1,5 +1,6 @@ package org.deeplearning4j.nn.conf.constraints; +import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -13,15 +14,12 @@ import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.util.ModelSerializer; import org.junit.Test; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; - import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -40,7 +38,7 @@ public void testLayerRecurrentConstraints() throws Exception { for (LayerConstraint lc : constraints) { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .learningRate(0.0) + .updater(new Sgd(0.0)) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 5)) .list() .layer(new LSTM.Builder().nIn(12).nOut(10) @@ -75,15 +73,7 @@ public void testLayerRecurrentConstraints() throws Exception { assertEquals(RW0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6); } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(net, baos, true); - byte[] bytes = baos.toByteArray(); - - ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); - - assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); - assertEquals(net.params(), restored.params()); + TestUtils.testModelSerialization(net); } } @@ -100,7 +90,7 @@ public void testLayerBiasConstraints() throws Exception { for (LayerConstraint lc : constraints) { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .learningRate(0.0) + .updater(new Sgd(0.0)) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 5)) .biasInit(10.0) .list() @@ -136,15 +126,7 @@ public void testLayerBiasConstraints() throws Exception { assertEquals(b0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6); } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(net, baos, true); - byte[] bytes = baos.toByteArray(); - - ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); - - assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); - assertEquals(net.params(), restored.params()); + TestUtils.testModelSerialization(net); } } @@ -161,7 +143,7 @@ public void testLayerWeightsConstraints() throws Exception { for (LayerConstraint lc : constraints) { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .learningRate(0.0) + .updater(new Sgd(0.0)) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 5)) .list() .layer(new DenseLayer.Builder().nIn(12).nOut(10) @@ -196,15 +178,7 @@ public void testLayerWeightsConstraints() throws Exception { assertEquals(w0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6); } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(net, baos, true); - byte[] bytes = baos.toByteArray(); - - ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); - - assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); - assertEquals(net.params(), restored.params()); + TestUtils.testModelSerialization(net); } } @@ -221,7 +195,7 @@ public void testLayerWeightsAndBiasConstraints() throws Exception { for (LayerConstraint lc : constraints) { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .learningRate(0.0) + .updater(new Sgd(0.0)) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 5)) .biasInit(0.2) .list() @@ -264,15 +238,7 @@ public void testLayerWeightsAndBiasConstraints() throws Exception { assertEquals(b0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6); } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(net, baos, true); - byte[] bytes = baos.toByteArray(); - - ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); - - assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); - assertEquals(net.params(), restored.params()); + TestUtils.testModelSerialization(net); } } @@ -290,7 +256,7 @@ public void testLayerWeightsAndBiasSeparateConstraints() throws Exception { for (LayerConstraint lc : constraints) { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .learningRate(0.0) + .updater(new Sgd(0.0)) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 5)) .biasInit(0.2) .list() @@ -333,15 +299,7 @@ public void testLayerWeightsAndBiasSeparateConstraints() throws Exception { assertEquals(b0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6); } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(net, baos, true); - byte[] bytes = baos.toByteArray(); - - ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); - - assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); - assertEquals(net.params(), restored.params()); + TestUtils.testModelSerialization(net); } } @@ -359,7 +317,7 @@ public void testModelConstraints() throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .constrainWeights(lc) - .learningRate(0.0) + .updater(new Sgd(0.0)) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0,5)) .biasInit(1) .list() @@ -400,15 +358,7 @@ public void testModelConstraints() throws Exception { } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(net, baos, true); - byte[] bytes = baos.toByteArray(); - - ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); - - assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); - assertEquals(net.params(), restored.params()); + TestUtils.testModelSerialization(net); } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java new file mode 100644 index 000000000000..444b97d82829 --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java @@ -0,0 +1,296 @@ +package org.deeplearning4j.nn.conf.dropout; + +import lombok.Data; +import org.deeplearning4j.TestUtils; +import org.deeplearning4j.datasets.iterator.ExistingDataSetIterator; +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.graph.LayerVertex; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.graph.ComputationGraph; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.junit.Test; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.impl.accum.MatchCondition; +import org.nd4j.linalg.dataset.DataSet; +import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.indexing.conditions.Conditions; +import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.primitives.Triple; +import org.nd4j.linalg.schedule.MapSchedule; +import org.nd4j.linalg.schedule.ScheduleType; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestDropout { + + @Test + public void testBasicConfig(){ + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .dropOut(0.6) + .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer(new DenseLayer.Builder().nIn(10).nOut(10).dropOut(0.7).build()) + .layer(new DenseLayer.Builder().nIn(10).nOut(10).dropOut(new AlphaDropout(0.5)).build()) + .build(); + + assertEquals(new Dropout(0.6), conf.getConf(0).getLayer().getIDropout()); + assertEquals(new Dropout(0.7), conf.getConf(1).getLayer().getIDropout()); + assertEquals(new AlphaDropout(0.5), conf.getConf(2).getLayer().getIDropout()); + + + ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + .dropOut(0.6) + .graphBuilder() + .addInputs("in") + .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") + .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).dropOut(0.7).build(), "0") + .addLayer("2", new DenseLayer.Builder().nIn(10).nOut(10).dropOut(new AlphaDropout(0.5)).build(), "1") + .setOutputs("2") + .build(); + + assertEquals(new Dropout(0.6), ((LayerVertex)conf2.getVertices().get("0")).getLayerConf().getLayer().getIDropout()); + assertEquals(new Dropout(0.7), ((LayerVertex)conf2.getVertices().get("1")).getLayerConf().getLayer().getIDropout()); + assertEquals(new AlphaDropout(0.5), ((LayerVertex)conf2.getVertices().get("2")).getLayerConf().getLayer().getIDropout()); + } + + @Test + public void testCalls(){ + + CustomDropout d1 = new CustomDropout(); + CustomDropout d2 = new CustomDropout(); + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().nIn(4).nOut(3).dropOut(d1).build()) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MSE).dropOut(d2).nIn(3).nOut(3).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + List l = new ArrayList<>(); + l.add(new DataSet(Nd4j.rand(5,4), Nd4j.rand(5,3))); + l.add(new DataSet(Nd4j.rand(5,4), Nd4j.rand(5,3))); + l.add(new DataSet(Nd4j.rand(5,4), Nd4j.rand(5,3))); + + DataSetIterator iter = new ExistingDataSetIterator(l); + + net.fit(iter); + net.fit(iter); + + List> expList = Arrays.asList( + new Triple<>(0, 0, false), + new Triple<>(1, 0, false), + new Triple<>(2, 0, false), + new Triple<>(3, 1, false), + new Triple<>(4, 1, false), + new Triple<>(5, 1, false)); + + assertEquals(expList, d1.getAllCalls()); + assertEquals(expList, d2.getAllCalls()); + + + d1 = new CustomDropout(); + d2 = new CustomDropout(); + ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + .graphBuilder() + .addInputs("in") + .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).dropOut(d1).build(), "in") + .addLayer("1", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).dropOut(d2).nIn(3).nOut(3).build(), "0") + .setOutputs("1") + .build(); + + ComputationGraph net2 = new ComputationGraph(conf2); + net2.init(); + + net2.fit(iter); + net2.fit(iter); + + assertEquals(expList, d1.getAllCalls()); + assertEquals(expList, d2.getAllCalls()); + } + + @Data + private static class CustomDropout implements IDropout{ + + private List> allCalls = new ArrayList<>(); + + @Override + public INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace) { + allCalls.add(new Triple<>(iteration, epoch, inPlace)); + return inputActivations; + } + + @Override + public IDropout clone() { + throw new UnsupportedOperationException(); + } + } + + @Test + public void testSerialization(){ + + IDropout[] dropouts = new IDropout[]{ + new Dropout(0.5), + new AlphaDropout(0.5), + new GaussianDropout(0.1), + new GaussianNoise(0.1)}; + + for(IDropout id : dropouts) { + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .dropOut(id) + .list() + .layer(new DenseLayer.Builder().nIn(4).nOut(3).build()) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(3).nOut(3).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + TestUtils.testModelSerialization(net); + + ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + .dropOut(id) + .graphBuilder() + .addInputs("in") + .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "in") + .addLayer("1", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(3).nOut(3).build(), "0") + .setOutputs("1") + .build(); + + ComputationGraph net2 = new ComputationGraph(conf2); + net2.init(); + + TestUtils.testModelSerialization(net2); + } + } + + @Test + public void testDropoutValues(){ + Nd4j.getRandom().setSeed(12345); + + Dropout d = new Dropout(0.5); + + INDArray in = Nd4j.ones(10, 10); + INDArray out = d.applyDropout(in, 0, 0, false); + + assertEquals(in, Nd4j.ones(10, 10)); + + int countZeros = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(0))).z().getInt(0); + int countTwos = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(2))).z().getInt(0); + + assertEquals(100, countZeros + countTwos); //Should only be 0 or 2 + //Stochastic, but this should hold for most cases + assertTrue(countZeros >= 25 && countZeros <= 75); + assertTrue(countTwos >= 25 && countTwos <= 75); + + //Test schedule: + d = new Dropout(new MapSchedule.Builder(ScheduleType.ITERATION).add(0, 0.5).add(5, 0.1).build()); + for( int i=0; i<10; i++ ) { + out = d.applyDropout(in, i, 0, false); + assertEquals(in, Nd4j.ones(10, 10)); + countZeros = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(0))).z().getInt(0); + + if(i < 5){ + countTwos = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(2))).z().getInt(0); + assertEquals(String.valueOf(i), 100, countZeros + countTwos); //Should only be 0 or 2 + //Stochastic, but this should hold for most cases + assertTrue(countZeros >= 25 && countZeros <= 75); + assertTrue(countTwos >= 25 && countTwos <= 75); + } else { + int countInverse = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(1.0/0.1))).z().getInt(0); + assertEquals(100, countZeros + countInverse); //Should only be 0 or 10 + //Stochastic, but this should hold for most cases + assertTrue(countZeros >= 80); + assertTrue(countInverse <= 20); + } + } + } + + @Test + public void testGaussianDropoutValues(){ + Nd4j.getRandom().setSeed(12345); + + GaussianDropout d = new GaussianDropout(0.1); //sqrt(0.1/(1-0.1)) = 0.3333 stdev + + INDArray in = Nd4j.ones(50, 50); + INDArray out = d.applyDropout(in, 0, 0, false); + + assertEquals(in, Nd4j.ones(50, 50)); + + double mean = out.meanNumber().doubleValue(); + double stdev = out.stdNumber().doubleValue(); + + assertEquals(1.0, mean, 0.05); + assertEquals(0.333, stdev, 0.02); + } + + @Test + public void testGaussianNoiseValues(){ + Nd4j.getRandom().setSeed(12345); + + GaussianNoise d = new GaussianNoise(0.1); //sqrt(0.1/(1-0.1)) = 0.3333 stdev + + INDArray in = Nd4j.ones(50, 50); + INDArray out = d.applyDropout(in, 0, 0, false); + + assertEquals(in, Nd4j.ones(50, 50)); + + double mean = out.meanNumber().doubleValue(); + double stdev = out.stdNumber().doubleValue(); + + assertEquals(1.0, mean, 0.05); + assertEquals(0.1, stdev, 0.01); + } + + @Test + public void testAlphaDropoutValues(){ + Nd4j.getRandom().setSeed(12345); + + double p = 0.4; + AlphaDropout d = new AlphaDropout(p); + + double SELU_ALPHA = 1.6732632423543772; + double SELU_LAMBDA = 1.0507009873554804; + double alphaPrime = - SELU_LAMBDA * SELU_ALPHA; + double a = 1.0 / Math.sqrt((p + alphaPrime * alphaPrime * p * (1-p))); + double b = -1.0 / Math.sqrt(p + alphaPrime * alphaPrime * p * (1-p)) * (1-p) * alphaPrime; + + double actA = d.a(p); + double actB = d.b(p); + + assertEquals(a, actA, 1e-6); + assertEquals(b, actB, 1e-6); + + INDArray in = Nd4j.ones(10, 10); + INDArray out = d.applyDropout(in, 0, 0, false); + + int countValueDropped = 0; + int countEqn = 0; + double eqn = a * 1 + b; + double valueDropped = a * alphaPrime + b; + for(int i=0; i<100; i++ ){ + double v = out.getDouble(i); + if(v >= valueDropped - 1e-6 && v <= valueDropped + 1e-6){ + countValueDropped++; + } else if(v >= eqn - 1e-6 && v <= eqn + 1e-6){ + countEqn++; + } + + } + + assertEquals(100, countValueDropped+ countEqn); + assertTrue(countValueDropped >= 25 && countValueDropped <= 75); + assertTrue(countEqn >= 25 && countEqn <= 75); + } + +} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java index 7912e4a1622c..50c391156049 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java @@ -1,10 +1,8 @@ package org.deeplearning4j.nn.conf.graph; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -19,8 +17,10 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; import java.util.Map; @@ -175,7 +175,7 @@ public void testElementWiseVertexFullAdd() { int midsz = 13; int outputsz = 11; ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) - .biasInit(0.0).updater(Updater.SGD) + .biasInit(0.0).updater(new Sgd()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input1", "input2", "input3") .addLayer("dense1", @@ -240,7 +240,7 @@ public void testElementWiseVertexFullAdd() { expect.addi(Transforms.sigmoid(middle.mmul(output_W).addi(output_b.repmat(batchsz, 1)))); - INDArray output = nullsafe(cg.getOutputLayer(0).activate()); + INDArray output = nullsafe(cg.output(input1, input2, input3)[0]); Assert.assertEquals(0.0, mse(output, expect), this.epsilon); @@ -350,7 +350,7 @@ public void testElementWiseVertexFullProduct() { int midsz = 13; int outputsz = 11; ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) - .biasInit(0.0).updater(Updater.SGD) + .biasInit(0.0).updater(new Sgd()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input1", "input2", "input3") .addLayer("dense1", @@ -415,7 +415,7 @@ public void testElementWiseVertexFullProduct() { expect.addi(Transforms.sigmoid(middle.mmul(output_W).addi(output_b.repmat(batchsz, 1)))); - INDArray output = nullsafe(cg.getOutputLayer(0).activate()); + INDArray output = nullsafe(cg.output(input1, input2, input3)[0]); Assert.assertEquals(0.0, mse(output, expect), this.epsilon); @@ -524,7 +524,7 @@ public void testElementWiseVertexFullSubtract() { int midsz = 13; int outputsz = 11; ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) - .biasInit(0.0).updater(Updater.SGD) + .biasInit(0.0).updater(new Sgd()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input1", "input2") .addLayer("dense1", @@ -579,7 +579,7 @@ public void testElementWiseVertexFullSubtract() { expect.addi(Transforms.sigmoid(middle.mmul(output_W).addi(output_b.repmat(batchsz, 1)))); - INDArray output = nullsafe(cg.getOutputLayer(0).activate()); + INDArray output = nullsafe(cg.output(input1, input2)[0]); Assert.assertEquals(0.0, mse(output, expect), this.epsilon); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java index 57cb536d4f3e..27cef6c1ee15 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java @@ -1,10 +1,8 @@ package org.deeplearning4j.nn.conf.graph; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -19,7 +17,9 @@ import org.nd4j.linalg.activations.impl.ActivationTanH; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; +import org.nd4j.linalg.primitives.Pair; import java.util.Map; import java.util.TreeMap; @@ -121,7 +121,7 @@ public void testComprehensive() { {0.55, 0.60, 0.65, 0.70, 0.75}, {0.80, 0.85, 0.90, 0.95, 0.99}}); ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) - .learningRate(0.01).updater(Updater.SGD) + .updater(new Sgd(0.01)) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input") .addLayer("denselayer", diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java index 61d8944450c6..bc3b272cb8cf 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java @@ -2,9 +2,9 @@ import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.layers.RBM.HiddenUnit; import org.deeplearning4j.nn.conf.layers.RBM.VisibleUnit; import org.deeplearning4j.nn.weights.WeightInit; @@ -14,6 +14,8 @@ import org.nd4j.linalg.activations.impl.ActivationSoftmax; import org.nd4j.linalg.activations.impl.ActivationTanH; import org.nd4j.linalg.convolution.Convolution; +import org.nd4j.linalg.learning.config.AdaGrad; +import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import java.io.*; @@ -45,7 +47,7 @@ public class LayerBuilderTest { double corruptionLevel = 0.5; Distribution dist = new NormalDistribution(1.0, 0.1); double dropOut = 0.1; - Updater updater = Updater.ADAGRAD; + IUpdater updater = new AdaGrad(); GradientNormalization gradNorm = GradientNormalization.ClipL2PerParamType; double gradNormThreshold = 8; @@ -60,8 +62,8 @@ public void testLayer() throws Exception { assertEquals(act, layer.getActivationFn()); assertEquals(weight, layer.getWeightInit()); assertEquals(dist, layer.getDist()); - assertEquals(dropOut, layer.getDropOut(), DELTA); - assertEquals(updater, layer.getUpdater()); + assertEquals(new Dropout(dropOut), layer.getIDropout()); + assertEquals(updater, layer.getIUpdater()); assertEquals(gradNorm, layer.getGradientNormalization()); assertEquals(gradNormThreshold, layer.getGradientNormalizationThreshold(), 0.0); } @@ -226,7 +228,7 @@ private void checkSerialization(Layer layer) throws Exception { assertEquals("unequal YAML serialization", confExpected.getLayer(), confActual.getLayer()); // check the layer's use of callSuper on equals method - confActual.getLayer().setDropOut(new java.util.Random().nextDouble()); + confActual.getLayer().setIDropout(new Dropout(new java.util.Random().nextDouble())); assertNotEquals("broken equals method (missing callSuper?)", confExpected.getLayer(), confActual.getLayer()); } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java index 58faa4dec172..d20544571175 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java @@ -1,8 +1,11 @@ package org.deeplearning4j.nn.conf.layers; -import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.junit.Test; @@ -11,11 +14,14 @@ import org.nd4j.linalg.learning.config.Adam; import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.learning.config.RmsProp; +import org.nd4j.linalg.schedule.MapSchedule; +import org.nd4j.linalg.schedule.ScheduleType; import java.util.HashMap; import java.util.Map; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; public class LayerConfigTest { @@ -101,6 +107,7 @@ public void testWeightBiasInitLayerwiseOverride() { assertEquals(0, ((BaseLayer) conf.getConf(1).getLayer()).getBiasInit(), 0.0); } + /* @Test public void testLrL1L2LayerwiseOverride() { //Idea: Set some common values for all layers. Then selectively override @@ -128,7 +135,7 @@ public void testLrL1L2LayerwiseOverride() { assertEquals(0.2, ((BaseLayer) conf.getConf(1).getLayer()).getLearningRate(), 0.0); //L1 and L2 without layerwise override: - conf = new NeuralNetConfiguration.Builder().regularization(true).l1(0.1).l2(0.2).list() + conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.2).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); net = new MultiLayerNetwork(conf); @@ -140,7 +147,7 @@ public void testLrL1L2LayerwiseOverride() { assertEquals(0.2, ((BaseLayer) conf.getConf(1).getLayer()).getL2(), 0.0); //L1 and L2 with layerwise override: - conf = new NeuralNetConfiguration.Builder().regularization(true).l1(0.1).l2(0.2).list() + conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.2).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l1(0.9).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.8).build()).build(); net = new MultiLayerNetwork(conf); @@ -150,7 +157,7 @@ public void testLrL1L2LayerwiseOverride() { assertEquals(0.1, ((BaseLayer) conf.getConf(1).getLayer()).getL1(), 0.0); assertEquals(0.2, ((BaseLayer) conf.getConf(0).getLayer()).getL2(), 0.0); assertEquals(0.8, ((BaseLayer) conf.getConf(1).getLayer()).getL2(), 0.0); - } + }*/ @@ -162,8 +169,8 @@ public void testDropoutLayerwiseOverride() { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(1.0, conf.getConf(0).getLayer().getDropOut(), 0.0); - assertEquals(1.0, conf.getConf(1).getLayer().getDropOut(), 0.0); + assertEquals(new Dropout(1.0), conf.getConf(0).getLayer().getIDropout()); + assertEquals(new Dropout(1.0), conf.getConf(1).getLayer().getIDropout()); conf = new NeuralNetConfiguration.Builder().dropOut(1.0).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) @@ -172,8 +179,8 @@ public void testDropoutLayerwiseOverride() { net = new MultiLayerNetwork(conf); net.init(); - assertEquals(1.0, conf.getConf(0).getLayer().getDropOut(), 0.0); - assertEquals(2.0, conf.getConf(1).getLayer().getDropOut(), 0.0); + assertEquals(new Dropout(1.0), conf.getConf(0).getLayer().getIDropout()); + assertEquals(new Dropout(2.0), conf.getConf(1).getLayer().getIDropout()); } @Test @@ -181,68 +188,55 @@ public void testMomentumLayerwiseOverride() { Map testMomentumAfter = new HashMap<>(); testMomentumAfter.put(0, 0.1); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NESTEROVS).momentum(1.0) - .momentumAfter(testMomentumAfter).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter))) + .list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(1.0, ((BaseLayer) conf.getConf(0).getLayer()).getMomentum(), 0.0); - assertEquals(1.0, ((BaseLayer) conf.getConf(1).getLayer()).getMomentum(), 0.0); - assertEquals(0.1, ((BaseLayer) conf.getConf(0).getLayer()).getMomentumSchedule().get(0), 0.0); - assertEquals(0.1, ((BaseLayer) conf.getConf(1).getLayer()).getMomentumSchedule().get(0), 0.0); + assertEquals(0.1, ((Nesterovs)((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); + assertEquals(0.1, ((Nesterovs)((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); Map testMomentumAfter2 = new HashMap<>(); testMomentumAfter2.put(0, 0.2); - conf = new NeuralNetConfiguration.Builder().updater(Updater.NESTEROVS).momentum(1.0) - .momentumAfter(testMomentumAfter).list() + conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter) )) + .list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder() - .nIn(2).nOut(2).momentum(2.0).momentumAfter(testMomentumAfter2).build()) + .nIn(2).nOut(2).updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter2))).build()) .build(); net = new MultiLayerNetwork(conf); net.init(); - - assertEquals(1.0, ((BaseLayer) conf.getConf(0).getLayer()).getMomentum(), 0.0); - assertEquals(2.0, ((BaseLayer) conf.getConf(1).getLayer()).getMomentum(), 0.0); - assertEquals(1.0, ((Nesterovs) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getMomentum(), 0.0); - assertEquals(2.0, ((Nesterovs) ((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getMomentum(), 0.0); - assertEquals(0.1, ((BaseLayer) conf.getConf(0).getLayer()).getMomentumSchedule().get(0), 0.0); - assertEquals(0.2, ((BaseLayer) conf.getConf(1).getLayer()).getMomentumSchedule().get(0), 0.0); - assertEquals(0.1, ((Nesterovs) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getMomentumSchedule() - .get(0), 0.0); - assertEquals(0.2, ((Nesterovs) ((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getMomentumSchedule() - .get(0), 0.0); - + assertEquals(0.1, ((Nesterovs)((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); + assertEquals(0.2, ((Nesterovs)((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); } @Test public void testUpdaterRhoRmsDecayLayerwiseOverride() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.ADADELTA).rho(0.5).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new AdaDelta(0.5, 0.9)).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).rho(0.01).build()).build(); + .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new AdaDelta(0.01,0.9)).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals("ADADELTA", ((BaseLayer) conf.getConf(0).getLayer()).getUpdater().toString()); - assertEquals("ADADELTA", ((BaseLayer) conf.getConf(1).getLayer()).getUpdater().toString()); - assertEquals(0.5, ((BaseLayer) conf.getConf(0).getLayer()).getRho(), 0.0); - assertEquals(0.01, ((BaseLayer) conf.getConf(1).getLayer()).getRho(), 0.0); + assertTrue(((BaseLayer) conf.getConf(0).getLayer()).getIUpdater() instanceof AdaDelta); + assertTrue(((BaseLayer) conf.getConf(1).getLayer()).getIUpdater() instanceof AdaDelta); + assertEquals(0.5, ((AdaDelta)((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getRho(), 0.0); + assertEquals(0.01, ((AdaDelta)((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getRho(), 0.0); - conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP).rmsDecay(2.0).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).rmsDecay(1.0).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(Updater.ADADELTA).rho(0.5).build()) + conf = new NeuralNetConfiguration.Builder().updater(new RmsProp(1.0, 2.0, RmsProp.DEFAULT_RMSPROP_EPSILON)).list() + .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).updater(new RmsProp(1.0, 1.0, RmsProp.DEFAULT_RMSPROP_EPSILON)).build()) + .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new AdaDelta.Builder().rho(0.5).build()).build()) .build(); net = new MultiLayerNetwork(conf); net.init(); - assertEquals("RMSPROP", ((BaseLayer) conf.getConf(0).getLayer()).getUpdater().toString()); - assertEquals("ADADELTA", ((BaseLayer) conf.getConf(1).getLayer()).getUpdater().toString()); - assertEquals(1.0, ((BaseLayer) conf.getConf(0).getLayer()).getRmsDecay(), 0.0); - assertEquals(0.5, ((BaseLayer) conf.getConf(1).getLayer()).getRho(), 0.0); + assertTrue(((BaseLayer) conf.getConf(0).getLayer()).getIUpdater() instanceof RmsProp); + assertTrue(((BaseLayer) conf.getConf(1).getLayer()).getIUpdater() instanceof AdaDelta); assertEquals(1.0, ((RmsProp) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getRmsDecay(), 0.0); assertEquals(0.5, ((AdaDelta) ((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getRho(), 0.0); } @@ -250,20 +244,15 @@ public void testUpdaterRhoRmsDecayLayerwiseOverride() { @Test public void testUpdaterAdamParamsLayerwiseOverride() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.ADAM).adamMeanDecay(0.5) - .adamVarDecay(0.5).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).adamMeanDecay(0.6).adamVarDecay(0.7).build()) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new Adam(1.0, 0.5, 0.5, 1e-8)) + .list() + .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) + .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new Adam(1.0, 0.6, 0.7, 1e-8)).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals("ADAM", ((BaseLayer) conf.getConf(0).getLayer()).getUpdater().toString()); - assertEquals("ADAM", ((BaseLayer) conf.getConf(1).getLayer()).getUpdater().toString(), "ADAM"); - assertEquals(0.5, ((BaseLayer) conf.getConf(0).getLayer()).getAdamMeanDecay(), 0.0); - assertEquals(0.6, ((BaseLayer) conf.getConf(1).getLayer()).getAdamMeanDecay(), 0.0); - assertEquals(0.5, ((BaseLayer) conf.getConf(0).getLayer()).getAdamVarDecay(), 0.0); - assertEquals(0.7, ((BaseLayer) conf.getConf(1).getLayer()).getAdamVarDecay(), 0.0); - assertEquals(0.5, ((Adam) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getBeta1(), 0.0); assertEquals(0.6, ((Adam) ((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getBeta1(), 0.0); assertEquals(0.5, ((Adam) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getBeta2(), 0.0); @@ -309,28 +298,15 @@ public void testGradientNormalizationLayerwiseOverride() { assertEquals(2.5, ((BaseLayer) conf.getConf(1).getLayer()).getGradientNormalizationThreshold(), 0.0); } - @Test - public void testLearningRatePolicyNone() { - double lr = 2; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.None).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - assertEquals(LearningRatePolicy.None, conf.getConf(0).getLearningRatePolicy()); - assertEquals(LearningRatePolicy.None, conf.getConf(1).getLearningRatePolicy()); - - } - + /* @Test public void testLearningRatePolicyExponential() { double lr = 2; double lrDecayRate = 5; int iterations = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr) + .updater(Updater.SGD) .learningRateDecayPolicy(LearningRatePolicy.Exponential).lrPolicyDecayRate(lrDecayRate).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); @@ -428,5 +404,5 @@ public void testLearningRatePolicySigmoid() { assertEquals(steps, conf.getConf(1).getLrPolicySteps(), 0.0); } - +*/ } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java index e813512d023f..7ce1787c6e06 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java @@ -1,16 +1,26 @@ package org.deeplearning4j.nn.conf.layers; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.distribution.GaussianDistribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.weightnoise.DropConnect; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.junit.Ignore; import org.junit.Test; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.learning.config.Nesterovs; +import org.nd4j.linalg.learning.config.RmsProp; +import org.nd4j.linalg.learning.config.Sgd; +import org.nd4j.linalg.schedule.MapSchedule; +import org.nd4j.linalg.schedule.ScheduleType; import java.util.HashMap; import java.util.Map; @@ -23,7 +33,7 @@ public class LayerConfigValidationTest { @Test public void testDropConnect() { // Warning thrown only since some layers may not have l1 or l2 - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).useDropConnect(true) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)).weightNoise(new DropConnect(0.5)) .list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -34,7 +44,7 @@ public void testDropConnect() { @Test public void testL1L2NotSet() { // Warning thrown only since some layers may not have l1 or l2 - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).regularization(true) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)) .list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -44,7 +54,7 @@ public void testL1L2NotSet() { @Test(expected = IllegalStateException.class) @Ignore //Old assumption: throw exception on l1 but no regularization. Current design: warn, not exception public void testRegNotSetL1Global() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).l1(0.5).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)).l1(0.5).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -54,7 +64,7 @@ public void testRegNotSetL1Global() { @Test(expected = IllegalStateException.class) @Ignore //Old assumption: throw exception on l1 but no regularization. Current design: warn, not exception public void testRegNotSetL2Local() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -65,7 +75,7 @@ public void testRegNotSetL2Local() { public void testWeightInitDistNotSet() { // Warning thrown only since global dist can be set with a different weight init locally MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(0.3).dist(new GaussianDistribution(1e-3, 2)) + new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)).dist(new GaussianDistribution(1e-3, 2)) .list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -79,134 +89,18 @@ public void testNesterovsNotSetGlobal() { testMomentumAfter.put(0, 0.1); MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().momentum(1.0).momentumAfter(testMomentumAfter).list() + new NeuralNetConfiguration.Builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter))).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); } - @Test - public void testNesterovsNotSetLocalMomentum() { - // Warnings only thrown - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).momentum(0.3).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - } - - @Test - public void testNesterovsNotSetLocalMuAfter() { - // Warnings only thrown - Map testMomentumAfter = new HashMap<>(); - testMomentumAfter.put(0, 0.1); - - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2) - .momentumAfter(testMomentumAfter).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - } - - - @Test - public void testAdaDeltaValidation() { - // Warnings only thrown - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().rho(0.5).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).rho(0.01).build()).build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - } - - @Test - public void testRmsPropValidation() { - // Warnings only thrown - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().rmsDecay(2.0).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).rmsDecay(1.0).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(Updater.ADADELTA).rho(0.5).build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - } - - - @Test - public void testAdamValidation() { - // Warnings only thrown - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().adamMeanDecay(0.5).adamVarDecay(0.5).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).adamMeanDecay(0.6).adamVarDecay(0.7).build()) - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - } - - - @Test(expected = IllegalStateException.class) - public void testLRPolicyMissingDecayRate() { - double lr = 2; - double power = 3; - int iterations = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyPower(power).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - } - - @Test(expected = IllegalStateException.class) - public void testLRPolicyMissingPower() { - double lr = 2; - double lrDecayRate = 5; - int iterations = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(lrDecayRate).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - } - - @Test(expected = IllegalStateException.class) - public void testLRPolicyMissingSteps() { - double lr = 2; - double lrDecayRate = 5; - int iterations = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(lrDecayRate).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - } - - @Test(expected = IllegalStateException.class) - public void testLRPolicyMissingSchedule() { - double lr = 2; - double lrDecayRate = 5; - int iterations = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.Schedule).lrPolicyDecayRate(lrDecayRate).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - } - @Test public void testCompGraphNullLayer() { ComputationGraphConfiguration.GraphBuilder gb = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.01) - .iterations(3).seed(42).miniBatch(false).l1(0.2).l2(0.2).rmsDecay(0.3).regularization(true) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.01)) + .iterations(3).seed(42).miniBatch(false).l1(0.2).l2(0.2) /* Graph Builder */ .updater(Updater.RMSPROP).graphBuilder().addInputs("in") .addLayer("L" + 1, @@ -236,22 +130,22 @@ public void testPredefinedConfigValues() { double expectedL2 = 0.0; // Nesterovs Updater - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).updater(Updater.NESTEROVS) - .regularization(true).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).momentum(0.4).build()).build(); + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(0.9)) + .list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build()) + .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new Nesterovs(0.3, 0.4)).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); BaseLayer layerConf = (BaseLayer) net.getLayer(0).conf().getLayer(); - assertEquals(expectedMomentum, layerConf.getMomentum(), 1e-3); + assertEquals(expectedMomentum, ((Nesterovs)layerConf.getIUpdater()).getMomentum(), 1e-3); assertEquals(expectedL1, layerConf.getL1(), 1e-3); assertEquals(0.5, layerConf.getL2(), 1e-3); BaseLayer layerConf1 = (BaseLayer) net.getLayer(1).conf().getLayer(); - assertEquals(0.4, layerConf1.getMomentum(), 1e-3); + assertEquals(0.4, ((Nesterovs)layerConf1.getIUpdater()).getMomentum(), 1e-3); // Adam Updater - conf = new NeuralNetConfiguration.Builder().learningRate(0.3).updater(Updater.ADAM).regularization(true) + conf = new NeuralNetConfiguration.Builder().updater(new Adam(0.3)) .weightInit(WeightInit.DISTRIBUTION).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).l1(0.3).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); @@ -263,27 +157,27 @@ public void testPredefinedConfigValues() { assertEquals(0.5, layerConf.getL2(), 1e-3); layerConf1 = (BaseLayer) net.getLayer(1).conf().getLayer(); - assertEquals(expectedAdamMeanDecay, layerConf1.getAdamMeanDecay(), 1e-3); - assertEquals(expectedAdamVarDecay, layerConf1.getAdamVarDecay(), 1e-3); + assertEquals(expectedAdamMeanDecay, ((Adam)layerConf1.getIUpdater()).getBeta1(), 1e-3); + assertEquals(expectedAdamVarDecay, ((Adam)layerConf1.getIUpdater()).getBeta2(), 1e-3); assertEquals(expectedDist, layerConf1.getDist()); // l1 & l2 local should still be set whether regularization true or false assertEquals(expectedL1, layerConf1.getL1(), 1e-3); assertEquals(expectedL2, layerConf1.getL2(), 1e-3); //RMSProp Updater - conf = new NeuralNetConfiguration.Builder().learningRate(0.3).updater(Updater.RMSPROP).list() + conf = new NeuralNetConfiguration.Builder().updater(new RmsProp(0.3)).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).rmsDecay(0.4).build()).build(); + .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new RmsProp(0.3, 0.4, RmsProp.DEFAULT_RMSPROP_EPSILON)).build()).build(); net = new MultiLayerNetwork(conf); net.init(); layerConf = (BaseLayer) net.getLayer(0).conf().getLayer(); - assertEquals(expectedRmsDecay, layerConf.getRmsDecay(), 1e-3); + assertEquals(expectedRmsDecay, ((RmsProp)layerConf.getIUpdater()).getRmsDecay(), 1e-3); assertEquals(expectedL1, layerConf.getL1(), 1e-3); assertEquals(expectedL2, layerConf.getL2(), 1e-3); layerConf1 = (BaseLayer) net.getLayer(1).conf().getLayer(); - assertEquals(0.4, layerConf1.getRmsDecay(), 1e-3); + assertEquals(0.4, ((RmsProp)layerConf1.getIUpdater()).getRmsDecay(), 1e-3); } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java index 95bbddaab72e..56295ae29640 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java @@ -44,7 +44,7 @@ public void testCustomPreprocessor() { //Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works... MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(0.1).list() + new NeuralNetConfiguration.Builder().list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10) .nOut(10).build()) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java index b0b7d0f08640..d2575c02acf6 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java @@ -1,10 +1,8 @@ package org.deeplearning4j.nn.conf.preprocessor; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; import org.deeplearning4j.nn.conf.layers.GravesLSTM; @@ -432,27 +430,22 @@ public void testAutoAdditionOfPreprocessors() { @Test public void testCnnToDense() { MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() - //.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .learningRate(0.01) // default - //.momentum(0.9) - .regularization(true) - .list().layer(0, - new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( - 4, 4) // 28*28*1 => 15*15*10 - .nIn(1).nOut(10).padding(2, 2) - .stride(2, 2) - .weightInit(WeightInit.RELU) - .activation(Activation.RELU) - .build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder() - .activation(Activation.RELU).nOut(200).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(200) - .nOut(5).weightInit(WeightInit.RELU) - .activation(Activation.SOFTMAX).updater(Updater.SGD).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).backprop(true) - .pretrain(false).build(); + new NeuralNetConfiguration.Builder() + .list().layer(0, + new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( + 4, 4) // 28*28*1 => 15*15*10 + .nIn(1).nOut(10).padding(2, 2) + .stride(2, 2) + .weightInit(WeightInit.RELU) + .activation(Activation.RELU) + .build()) + .layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder() + .activation(Activation.RELU).nOut(200).build()) + .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(200) + .nOut(5).weightInit(WeightInit.RELU) + .activation(Activation.SOFTMAX).build()) + .setInputType(InputType.convolutionalFlat(28, 28, 1)).backprop(true) + .pretrain(false).build(); assertNotNull(conf.getInputPreProcess(0)); assertNotNull(conf.getInputPreProcess(1)); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/custom/MyCustomPreprocessor.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/custom/MyCustomPreprocessor.java index c024619ef2b7..611ad9434ffa 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/custom/MyCustomPreprocessor.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/custom/MyCustomPreprocessor.java @@ -1,11 +1,11 @@ package org.deeplearning4j.nn.conf.preprocessor.custom; import lombok.EqualsAndHashCode; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * Created by Alex on 09/09/2016. diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java new file mode 100644 index 000000000000..041f8f29b70f --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java @@ -0,0 +1,250 @@ +package org.deeplearning4j.nn.conf.weightnoise; + +import lombok.AllArgsConstructor; +import lombok.Data; +import org.deeplearning4j.TestUtils; +import org.deeplearning4j.datasets.iterator.ExistingDataSetIterator; +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.graph.ComputationGraph; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.weights.WeightInit; +import org.junit.Test; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.impl.accum.MatchCondition; +import org.nd4j.linalg.dataset.DataSet; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.indexing.conditions.Conditions; +import org.nd4j.linalg.schedule.ScheduleType; +import org.nd4j.linalg.schedule.SigmoidSchedule; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.*; + +public class TestWeightNoise { + + @Test + public void testWeightNoiseConfigJson() { + IWeightNoise[] weightNoises = new IWeightNoise[]{ + new DropConnect(0.5), + new DropConnect(new SigmoidSchedule(ScheduleType.ITERATION, 0.5, 0.5, 100)), + new WeightNoise(new NormalDistribution(0, 0.1)) + }; + + for (IWeightNoise wn : weightNoises) { + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .weightNoise(wn) + .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build()) + .layer(new OutputLayer.Builder().nIn(10).nOut(10).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + assertEquals(wn, ((BaseLayer) net.getLayer(0).conf().getLayer()).getWeightNoise()); + assertEquals(new DropConnect(0.25), ((BaseLayer) net.getLayer(1).conf().getLayer()).getWeightNoise()); + assertEquals(wn, ((BaseLayer) net.getLayer(2).conf().getLayer()).getWeightNoise()); + + TestUtils.testModelSerialization(net); + + + ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + .weightNoise(wn) + .graphBuilder() + .addInputs("in") + .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") + .layer("1", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build(), "0") + .layer("2", new OutputLayer.Builder().nIn(10).nOut(10).build(), "1") + .setOutputs("2") + .build(); + + ComputationGraph graph = new ComputationGraph(conf2); + graph.init(); + + assertEquals(wn, ((BaseLayer) graph.getLayer(0).conf().getLayer()).getWeightNoise()); + assertEquals(new DropConnect(0.25), ((BaseLayer) graph.getLayer(1).conf().getLayer()).getWeightNoise()); + assertEquals(wn, ((BaseLayer) graph.getLayer(2).conf().getLayer()).getWeightNoise()); + + TestUtils.testModelSerialization(graph); + } + } + + + @Test + public void testCalls() { + + List trainData = new ArrayList<>(); + trainData.add(new DataSet(Nd4j.rand(5, 10), Nd4j.rand(5, 10))); + trainData.add(new DataSet(Nd4j.rand(5, 10), Nd4j.rand(5, 10))); + trainData.add(new DataSet(Nd4j.rand(5, 10), Nd4j.rand(5, 10))); + + List> expCalls = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + List expCallsForLayer = new ArrayList<>(); + expCallsForLayer.add(new WeightNoiseCall(i, "W", 0, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(i, "b", 0, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(i, "W", 1, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(i, "b", 1, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(i, "W", 2, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(i, "b", 2, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(i, "W", 3, 1, true)); + expCallsForLayer.add(new WeightNoiseCall(i, "b", 3, 1, true)); + expCallsForLayer.add(new WeightNoiseCall(i, "W", 4, 1, true)); + expCallsForLayer.add(new WeightNoiseCall(i, "b", 4, 1, true)); + expCallsForLayer.add(new WeightNoiseCall(i, "W", 5, 1, true)); + expCallsForLayer.add(new WeightNoiseCall(i, "b", 5, 1, true)); + + expCallsForLayer.add(new WeightNoiseCall(i, "W", 5, 1, false)); + expCallsForLayer.add(new WeightNoiseCall(i, "b", 5, 1, false)); + + expCalls.add(expCallsForLayer); + } + + + CustomWeightNoise wn1 = new CustomWeightNoise(); + CustomWeightNoise wn2 = new CustomWeightNoise(); + CustomWeightNoise wn3 = new CustomWeightNoise(); + + List list = Arrays.asList(wn1, wn2, wn3); + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn1).build()) + .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn2).build()) + .layer(new OutputLayer.Builder().nIn(10).nOut(10).weightNoise(wn3).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + net.fit(new ExistingDataSetIterator(trainData.iterator())); + net.fit(new ExistingDataSetIterator(trainData.iterator())); + net.output(trainData.get(0).getFeatures()); + + for (int i = 0; i < 3; i++) { + assertEquals(expCalls.get(i), list.get(i).getAllCalls()); + } + + + wn1 = new CustomWeightNoise(); + wn2 = new CustomWeightNoise(); + wn3 = new CustomWeightNoise(); + list = Arrays.asList(wn1, wn2, wn3); + + ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + .graphBuilder() + .addInputs("in") + .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn1).build(), "in") + .layer("1", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn2).build(), "0") + .layer("2", new OutputLayer.Builder().nIn(10).nOut(10).weightNoise(wn3).build(), "1") + .setOutputs("2") + .build(); + + ComputationGraph graph = new ComputationGraph(conf2); + graph.init(); + + int[] layerIdxs = new int[]{graph.getLayer(0).getIndex(), graph.getLayer(1).getIndex(), graph.getLayer(2).getIndex()}; + + expCalls.clear(); + for (int i = 0; i < 3; i++) { + List expCallsForLayer = new ArrayList<>(); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 0, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 0, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 1, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 1, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 2, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 2, 0, true)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 3, 1, true)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 3, 1, true)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 4, 1, true)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 4, 1, true)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 5, 1, true)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 5, 1, true)); + + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "W", 5, 1, false)); + expCallsForLayer.add(new WeightNoiseCall(layerIdxs[i], "b", 5, 1, false)); + + expCalls.add(expCallsForLayer); + } + + graph.fit(new ExistingDataSetIterator(trainData.iterator())); + graph.fit(new ExistingDataSetIterator(trainData.iterator())); + graph.output(trainData.get(0).getFeatures()); + + for (int i = 0; i < 3; i++) { + assertEquals(String.valueOf(i), expCalls.get(i), list.get(i).getAllCalls()); + } + + } + + @Data + private static class CustomWeightNoise implements IWeightNoise { + + private List allCalls = new ArrayList<>(); + + @Override + public INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train) { + allCalls.add(new WeightNoiseCall(layer.getIndex(), paramKey, iteration, epoch, train)); + return layer.getParam(paramKey); + } + + @Override + public IWeightNoise clone() { + return new CustomWeightNoise(); + } + } + + @AllArgsConstructor + @Data + private static class WeightNoiseCall { + private int layerIdx; + private String paramKey; + private int iter; + private int epoch; + private boolean train; + } + + + @Test + public void testDropConnectValues() { + Nd4j.getRandom().setSeed(12345); + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .weightInit(WeightInit.ONES) + .list() + .layer(new OutputLayer.Builder().nIn(10).nOut(10).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + Layer l = net.getLayer(0); + DropConnect d = new DropConnect(0.5); + + INDArray outTest = d.getParameter(l, "W", 0, 0, false); + assertTrue(l.getParam("W") == outTest); //Should be same object + INDArray outTrain = d.getParameter(l, "W", 0, 0, true); + assertNotEquals(l.getParam("W"), outTrain); + + assertEquals(l.getParam("W"), Nd4j.ones(10, 10)); + + int countZeros = Nd4j.getExecutioner().exec(new MatchCondition(outTrain, Conditions.equals(0))).z().getInt(0); + int countOnes = Nd4j.getExecutioner().exec(new MatchCondition(outTrain, Conditions.equals(1))).z().getInt(0); + + assertEquals(100, countZeros + countOnes); //Should only be 0 or 2 + //Stochastic, but this should hold for most cases + assertTrue(countZeros >= 25 && countZeros <= 75); + assertTrue(countOnes >= 25 && countOnes <= 75); + } + +} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java index c8b60f2b8ac4..0251f3e14742 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java @@ -1,6 +1,5 @@ package org.deeplearning4j.nn.graph; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.iterator.IteratorDataSetIterator; import org.deeplearning4j.datasets.iterator.IteratorMultiDataSetIterator; import org.deeplearning4j.nn.api.Layer; @@ -27,6 +26,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.primitives.Pair; import java.util.Collections; import java.util.Map; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java index 82334044749a..3317b7b7a56e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java @@ -1,6 +1,5 @@ package org.deeplearning4j.nn.graph; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.iterator.impl.ListDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; @@ -18,6 +17,7 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.Arrays; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java index 1855faf4e4b4..7c4e391a401d 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java @@ -16,6 +16,7 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.preprocessor.*; +import org.deeplearning4j.nn.conf.weightnoise.DropConnect; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -32,6 +33,8 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.io.ClassPathResource; +import org.nd4j.linalg.learning.config.AdaGrad; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.primitives.Pair; @@ -247,7 +250,7 @@ public void testIrisFitMultiDataSetIterator() throws Exception { .addInput("iris", 0, 3).addOutputOneHot("iris", 4, 3).build(); ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.1) + .updater(new Sgd(0.1)) .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) @@ -499,7 +502,7 @@ public void testPreTraining() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().iterations(100) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .iterations(1).updater(Updater.SGD).learningRate(1e-6).regularization(true) + .iterations(1).updater(new Sgd(1e-6)) .l2(2e-4).graphBuilder().addInputs("in") .addLayer("layer0", new RBM.Builder(RBM.HiddenUnit.GAUSSIAN, @@ -554,8 +557,9 @@ public void testScoreExamples() { int nIn = 5; int nOut = 6; ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01).l2(0.01) - .learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER) + new NeuralNetConfiguration.Builder().seed(12345).l1(0.01).l2(0.01) + .updater(new Sgd(0.1)) + .activation(Activation.TANH).weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(20).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(20).nOut(30).build(), "0") @@ -565,7 +569,7 @@ public void testScoreExamples() { .setOutputs("2").build(); ComputationGraphConfiguration confNoReg = - new NeuralNetConfiguration.Builder().seed(12345).learningRate(0.1).activation(Activation.TANH) + new NeuralNetConfiguration.Builder().seed(12345).updater(new Sgd(0.1)).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(20).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(20).nOut(30).build(), "0") @@ -619,8 +623,8 @@ public void testExternalErrors() { INDArray outData = Nd4j.rand(3, 10); Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration standard = new NeuralNetConfiguration.Builder().learningRate(0.1) - .updater(Updater.SGD).seed(12345).graphBuilder().addInputs("in") + ComputationGraphConfiguration standard = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + .seed(12345).graphBuilder().addInputs("in") .addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") .addLayer("out", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10) .nOut(10).build(), "l0") @@ -630,8 +634,8 @@ public void testExternalErrors() { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration external = new NeuralNetConfiguration.Builder().learningRate(0.1) - .updater(Updater.SGD).seed(12345).graphBuilder().addInputs("in") + ComputationGraphConfiguration external = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + .seed(12345).graphBuilder().addInputs("in") .addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("l0") .pretrain(false).backprop(true).build(); @@ -903,9 +907,8 @@ public void testIterationCountAndPresistence() throws IOException { @Test public void printSummary() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .activation(Activation.IDENTITY) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD); + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + .activation(Activation.IDENTITY); ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight") .addLayer("denseCentre0", new DenseLayer.Builder().nIn(10).nOut(9).build(), "inCentre") @@ -982,7 +985,7 @@ public void testSetOutputsMultipleCalls() { public void testDropoutValidation() { //At one point: this threw an exception due to incorrect validation for (boolean dropConnect : new boolean[]{false, true}) { - new NeuralNetConfiguration.Builder().regularization(true).useDropConnect(dropConnect).dropOut(0.5) + new NeuralNetConfiguration.Builder().weightNoise(new DropConnect(0.5)) .graphBuilder().setInputTypes(InputType.feedForward(1)).addInputs("input1") .addLayer("output", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(1).nOut(1) @@ -998,7 +1001,7 @@ public void testNoParamLayersL1L2() { //Don't care about this being valid ComputationGraphConfiguration c = - new NeuralNetConfiguration.Builder().regularization(true).l1(0.5).l2(0.6).graphBuilder() + new NeuralNetConfiguration.Builder().l1(0.5).l2(0.6).graphBuilder() .addInputs("in") .addLayer("sub1", new SubsamplingLayer.Builder(2, 2).build(), "in") .addLayer("sub2", new Subsampling1DLayer.Builder(2).build(), "sub1") @@ -1147,35 +1150,29 @@ public void testSummary() { int V_HEIGHT = 130; int V_NFRAMES = 150; ComputationGraphConfiguration confForArchitecture = - new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l2(0.001) //l2 regularization on all layers - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .iterations(1).learningRate(0.4).graphBuilder() + new NeuralNetConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers + .updater(new AdaGrad(0.4)).graphBuilder() .addInputs("in") .addLayer("layer0", new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB .nOut(30).stride(4, 4).activation(Activation.RELU).weightInit( - WeightInit.RELU) - .updater(Updater.ADAGRAD).build(),"in") //Output: (130-10+0)/4+1 = 31 -> 31*31*30 + WeightInit.RELU).build(),"in") //Output: (130-10+0)/4+1 = 31 -> 31*31*30 .addLayer("layer1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(3, 3).stride(2, 2).build(),"layer0") //(31-3+0)/2+1 = 15 .addLayer("layer2", new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2) .activation(Activation.RELU).weightInit(WeightInit.RELU) .updater(Updater.ADAGRAD).build(), "layer1") //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490 .addLayer("layer3", new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50) - .weightInit(WeightInit.RELU).updater(Updater.ADAGRAD) - .gradientNormalization( - GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).learningRate(0.5).build(), "layer2") + .weightInit(WeightInit.RELU).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) + .gradientNormalizationThreshold(10).build(), "layer2") .addLayer("layer4", new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50) .nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD) - .gradientNormalization( - GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).learningRate(0.6) + .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) + .gradientNormalizationThreshold(10) .build(), "layer3") .addLayer("layer5", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line - .updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER) - .gradientNormalization( - GradientNormalization.ClipElementWiseAbsoluteValue) + .weightInit(WeightInit.XAVIER) + .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10).build(), "layer4") .setOutputs("layer5") .inputPreProcessor("layer0", new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java index 0d9a59f06f80..676aff406dfd 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java @@ -3,7 +3,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.GravesLSTM; @@ -18,6 +17,8 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.NoOp; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Map; @@ -45,7 +46,7 @@ public void testVariableLengthSimple() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(0.1).seed(12345).graphBuilder().addInputs("in") + .updater(new Sgd(0.1)).seed(12345).graphBuilder().addInputs("in") .addLayer("0", new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "in") .addLayer("1", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE) @@ -134,7 +135,7 @@ public void testInputMasking() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(0.1).seed(12345).graphBuilder().addInputs("in") + .updater(new Sgd(0.1)).seed(12345).graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "in") .addLayer("1", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), @@ -270,21 +271,21 @@ public void testOutputMaskingScoreMagnitudes() { INDArray labels = Nd4j.ones(miniBatch, nOut, tsLength); ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(false).seed(12345L) + new NeuralNetConfiguration.Builder().seed(12345L) .graphBuilder() .addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5) .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build(), + .updater(new NoOp()).build(), "in") .addLayer("1", new RnnOutputLayer.Builder( LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(5).nOut(nOut) .weightInit(WeightInit.ZERO) - .updater(Updater.NONE).build(), + .updater(new NoOp()).build(), "0") .setOutputs("1").pretrain(false).backprop(true).build(); ComputationGraph net = new ComputationGraph(conf); @@ -338,42 +339,42 @@ public void testOutputMasking() { INDArray input = Nd4j.rand(new int[] {miniBatch, nIn, tsLength}); ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(false).seed(12345L) + new NeuralNetConfiguration.Builder().seed(12345L) .graphBuilder() .addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5) .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build(), + .updater(new NoOp()).build(), "in") .addLayer("1", new RnnOutputLayer.Builder( LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(5).nOut(nOut) .weightInit(WeightInit.XAVIER) - .updater(Updater.NONE).build(), + .updater(new NoOp()).build(), "0") .setOutputs("1").pretrain(false).backprop(true).build(); ComputationGraph net = new ComputationGraph(conf); net.init(); ComputationGraphConfiguration conf2 = - new NeuralNetConfiguration.Builder().regularization(false).seed(12345L) + new NeuralNetConfiguration.Builder().seed(12345L) .graphBuilder() .addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5) .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build(), + .updater(new NoOp()).build(), "in") .addLayer("1", new RnnOutputLayer.Builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nIn(5).nOut(nOut) .weightInit(WeightInit.XAVIER) - .updater(Updater.NONE).build(), + .updater(new NoOp()).build(), "0") .setOutputs("1").pretrain(false).backprop(true).build(); ComputationGraph net2 = new ComputationGraph(conf2); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java index e6723ddf6592..51c4bd86cd1c 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java @@ -1,27 +1,25 @@ package org.deeplearning4j.nn.graph.graphnodes; -import org.deeplearning4j.nn.conf.WorkspaceMode; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.GravesLSTM; -import org.deeplearning4j.nn.conf.layers.RnnOutputLayer; -import org.deeplearning4j.nn.transferlearning.TransferLearning; -import org.deeplearning4j.nn.weights.WeightInit; -import org.nd4j.linalg.learning.config.AdaDelta; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.graph.ElementWiseVertex; import org.deeplearning4j.nn.conf.graph.PreprocessorVertex; import org.deeplearning4j.nn.conf.graph.rnn.DuplicateToTimeSeriesVertex; import org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex; +import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.EmbeddingLayer; +import org.deeplearning4j.nn.conf.layers.GravesLSTM; import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.conf.layers.RnnOutputLayer; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.GraphVertex; import org.deeplearning4j.nn.graph.vertex.impl.*; +import org.deeplearning4j.nn.transferlearning.TransferLearning; +import org.deeplearning4j.nn.weights.WeightInit; import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; @@ -29,14 +27,14 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.AdaDelta; import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; import java.util.Map; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.*; public class TestGraphNodes { diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java index e0192b87edb9..3ce8938d6930 100755 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java @@ -22,7 +22,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer; @@ -38,6 +37,8 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Nesterovs; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import org.slf4j.Logger; @@ -59,8 +60,8 @@ private ComputationGraph getGraph(int numLabels, double lambda) { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE) - .learningRate(1.0).graphBuilder().addInputs("input1") + .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(new NoOp()) + .graphBuilder().addInputs("input1") .addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.RELU).build(), "input1") .addLayer("lossLayer", new CenterLossOutputLayer.Builder() @@ -80,9 +81,9 @@ public ComputationGraph getCNNMnistConfig() { int outputNum = 10; // The number of possible outcomes ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1) // Training iterations as above - .regularization(true).l2(0.0005).learningRate(0.01).weightInit(WeightInit.XAVIER) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS) - .momentum(0.9).graphBuilder().addInputs("input") + .l2(0.0005).weightInit(WeightInit.XAVIER) + .updater(new Nesterovs(0.01, 0.9)) + .graphBuilder().addInputs("input") .setInputTypes(InputType.convolutionalFlat(28, 28, 1)) .addLayer("0", new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java index 2e54bbc9bb5f..d934397e79e6 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java @@ -1,11 +1,9 @@ package org.deeplearning4j.nn.layers; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -17,6 +15,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.List; @@ -37,11 +36,10 @@ public class FrozenLayerTest { public void testFrozen() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); - FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().learningRate(0.1).build(); + FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(overallConf.clone().list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) @@ -93,8 +91,7 @@ public void cloneMLNFrozen() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(overallConf.list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) @@ -146,8 +143,7 @@ public void cloneMLNFrozen() { public void testFrozenCompGraph() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") @@ -198,8 +194,7 @@ public void cloneCompGraphFrozen() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java index fc2cf66d43ef..b3aba35968c1 100755 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java @@ -23,7 +23,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.GravesLSTM; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; @@ -42,6 +41,9 @@ import org.nd4j.linalg.dataset.SplitTestAndTrain; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.AdaGrad; +import org.nd4j.linalg.learning.config.NoOp; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import org.slf4j.Logger; @@ -63,7 +65,7 @@ public class OutputLayerTest { public void testIris2() { NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(10) - .learningRate(1e-1) + .updater(new Sgd(1e-1)) .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(4).nOut(3) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -126,10 +128,10 @@ public void testWeightsDifferent() { Nd4j.MAX_SLICES_TO_PRINT = Integer.MAX_VALUE; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).miniBatch(false).seed(123) - .iterations(1000).learningRate(1e-1) + .miniBatch(false).seed(123) + .iterations(1000).updater(new AdaGrad(1e-1)) .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(4).nOut(3) - .weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD) + .weightInit(WeightInit.XAVIER) .lossFunction(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .activation(Activation.SOFTMAX).build()) .build(); @@ -172,10 +174,10 @@ public void testBinary() { DataSet dataset = new DataSet(data, data2); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123).iterations(200) - .learningRate(1e-2) + .seed(123).iterations(200) + .updater(new Sgd(1e-2)) .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(6).nOut(2) - .weightInit(WeightInit.ZERO).updater(Updater.SGD).activation(Activation.SOFTMAX) + .weightInit(WeightInit.ZERO).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).build()) .build(); @@ -194,7 +196,7 @@ public void testBinary() { @Test public void testIris() { NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(5).learningRate(1e-1) + .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(5).updater(new Sgd(1e-1)) .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(4).nOut(3) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -229,7 +231,7 @@ public void testIris() { public void testSetParams() { NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(100) - .learningRate(1e-1) + .updater(new Sgd(1e-1)) .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(4).nOut(3) .weightInit(WeightInit.ZERO).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -267,11 +269,11 @@ public void testOutputLayersRnnForwardPass() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)).activation(Activation.TANH) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); @@ -294,11 +296,11 @@ public void testOutputLayersRnnForwardPass() { MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)).activation(Activation.TANH) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .build(); MultiLayerNetwork mlnRnn = new MultiLayerNetwork(confRnn); @@ -354,11 +356,11 @@ public void testRnnOutputLayerIncEdgeCases() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .activation(Activation.TANH).updater(Updater.NONE).build()) + .activation(Activation.TANH).updater(new NoOp()).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).pretrain(false).backprop(true) .build(); @@ -371,11 +373,11 @@ public void testRnnOutputLayerIncEdgeCases() { MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .activation(Activation.TANH).updater(Updater.NONE).build()) + .activation(Activation.TANH).updater(new NoOp()).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork mlnRnn = new MultiLayerNetwork(confRnn); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java index 615eb0907612..a860a20dcb9f 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java @@ -1,9 +1,7 @@ package org.deeplearning4j.nn.layers; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -16,6 +14,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.lang.reflect.Field; @@ -35,8 +34,8 @@ public void testDropoutSimple() throws Exception { int nOut = 8; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) - .iterations(1).regularization(true).dropOut(0.5).list() + .updater(new Sgd()) + .iterations(1).dropOut(0.5).list() .layer(0, new OutputLayer.Builder().activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.MSE).nIn(nIn).nOut(nOut) .weightInit(WeightInit.XAVIER).build()) @@ -110,8 +109,7 @@ public void testDropoutMultiLayer() throws Exception { int nOut = 4; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) - .iterations(1).regularization(true).dropOut(0.5).learningRate(1e-9) + .iterations(1).dropOut(0.5).updater(new Sgd(1e-9)) .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(10, 11)) //Weight init to cause sigmoid saturation .list() .layer(0, new DenseLayer.Builder().activation(Activation.SIGMOID).nIn(nIn).nOut(layerSize) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java index 491cfabfd69f..36b91e2d8c1e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java @@ -62,7 +62,7 @@ public void testDenseToOutputLayer() { //setup the network MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) - .regularization(true).l1(1e-1).l2(2e-4).useDropConnect(true).dropOut(0.5).miniBatch(true) + .l1(1e-1).l2(2e-4).dropOut(0.5).miniBatch(true) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() .layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java index 8597bc9d73b7..20cf2e83093e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java @@ -7,7 +7,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -29,6 +28,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import static org.junit.Assert.*; @@ -49,7 +49,7 @@ public void before() { public void testTwdFirstLayer() throws Exception { MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(123).iterations(5) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4) - .regularization(true).momentum(0.9).updater(Updater.NESTEROVS).useDropConnect(true).dropOut(0.5) + .updater(new Nesterovs(0.9)).dropOut(0.5) .list().layer(0, new ConvolutionLayer.Builder(8, 8) //16 filters kernel size 8 stride 4 .stride(4, 4).nOut(16).dropOut(0.5) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java index 668dce641161..a5ce0522fa6e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java @@ -1,6 +1,5 @@ package org.deeplearning4j.nn.layers.convolution; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.GradientNormalization; @@ -20,6 +19,7 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java new file mode 100644 index 000000000000..f6fef11eaa5f --- /dev/null +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java @@ -0,0 +1,110 @@ +package org.deeplearning4j.nn.layers.convolution; + +import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.Upsampling1D; +import org.deeplearning4j.nn.gradient.Gradient; +import org.junit.Test; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.dataset.DataSet; +import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; + +import java.util.Arrays; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * @author Max Pumperla + */ +public class Upsampling1DTest { + + private int nExamples = 1; + private int depth = 20; + private int nChannelsIn = 1; + private int inputLength = 28; + private int size = 2; + private int outputLength = inputLength * size; + private INDArray epsilon = Nd4j.ones(nExamples, depth, outputLength); + + + @Test + public void testUpsampling1D() throws Exception { + + double[] outArray = new double[] {1., 1., 2., 2., 3., 3., 4., 4.}; + INDArray containedExpectedOut = Nd4j.create(outArray, new int[] {1, 1, 8}); + INDArray containedInput = getContainedData(); + INDArray input = getData(); + Layer layer = getUpsampling1DLayer(); + + INDArray containedOutput = layer.activate(containedInput); + assertTrue(Arrays.equals(containedExpectedOut.shape(), containedOutput.shape())); + assertEquals(containedExpectedOut, containedOutput); + + INDArray output = layer.activate(input); + assertTrue(Arrays.equals(new int[] {nExamples, nChannelsIn, outputLength}, + output.shape())); + assertEquals(nChannelsIn, output.size(1), 1e-4); + } + + + @Test + public void testUpsampling1DBackprop() throws Exception { + INDArray expectedContainedEpsilonInput = + Nd4j.create(new double[] {1., 3., 2., 6., 7., 2., 5., 5.}, + new int[] {1, 1, 8}); + + INDArray expectedContainedEpsilonResult = Nd4j.create(new double[] {4., 8., 9., 10.}, + new int[] {1, 1, 4}); + + INDArray input = getContainedData(); + + Layer layer = getUpsampling1DLayer(); + layer.activate(input); + + Pair containedOutput = layer.backpropGradient(expectedContainedEpsilonInput); + + assertEquals(expectedContainedEpsilonResult, containedOutput.getSecond()); + assertEquals(null, containedOutput.getFirst().getGradientFor("W")); + assertEquals(expectedContainedEpsilonResult.shape().length, containedOutput.getSecond().shape().length); + + INDArray input2 = getData(); + layer.activate(input2); + int depth = input2.size(1); + + epsilon = Nd4j.ones(5, depth, outputLength); + + Pair out = layer.backpropGradient(epsilon); + assertEquals(input.shape().length, out.getSecond().shape().length); + assertEquals(depth, out.getSecond().size(1)); + } + + + private Layer getUpsampling1DLayer() { + NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) + .layer(new Upsampling1D.Builder(size).build()).build(); + return conf.getLayer().instantiate(conf, null, 0, + null, true); + } + + public INDArray getData() throws Exception { + DataSetIterator data = new MnistDataSetIterator(5, 5); + DataSet mnist = data.next(); + nExamples = mnist.numExamples(); + INDArray features = mnist.getFeatureMatrix().reshape(nExamples, nChannelsIn, inputLength, inputLength); + return features.slice(0, 3); + } + + private INDArray getContainedData() { + INDArray ret = Nd4j.create + (new double[] {1., 2., 3., 4.}, + new int[] {1, 1, 4}); + return ret; + } + +} diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java index e3abe33f2252..2b04a974927e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java @@ -3,17 +3,10 @@ import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.OutputLayer; -import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; import org.deeplearning4j.nn.conf.layers.Upsampling2D; import org.deeplearning4j.nn.gradient.Gradient; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.nn.weights.WeightInit; import org.junit.Test; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java index da6b9fdae43a..17ae80244edb 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java @@ -7,6 +7,7 @@ import org.deeplearning4j.nn.layers.custom.testclasses.CustomActivation; import org.junit.Test; import org.nd4j.linalg.activations.IActivation; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.shade.jackson.databind.ObjectMapper; import org.nd4j.shade.jackson.databind.introspect.AnnotatedClass; @@ -43,7 +44,7 @@ public void testCustomActivationFn() { //Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works... - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.1).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).activation(new CustomActivation()).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10).build()) .pretrain(false).backprop(true).build(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java index 0712cb7830bf..446b9c7eb28e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java @@ -72,7 +72,7 @@ public void testJsonMultiLayerNetwork() { //Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works... MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(0.1).list() + new NeuralNetConfiguration.Builder().list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new CustomLayer(3.14159)).layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -95,7 +95,7 @@ public void testJsonMultiLayerNetwork() { public void testJsonComputationGraph() { //ComputationGraph with a custom layer; check JSON and YAML config actually works... - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.1).graphBuilder() + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder() .addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") .addLayer("1", new CustomLayer(3.14159), "0").addLayer("2", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10) @@ -120,7 +120,7 @@ public void testJsonComputationGraph() { public void checkInitializationFF() { //Actually create a network with a custom layer; check initialization and forward pass - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.1).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() .layer(0, new DenseLayer.Builder().nIn(9).nOut(10).build()).layer(1, new CustomLayer(3.14159)) //hard-coded nIn/nOut of 10 .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(11).build()) .pretrain(false).backprop(true).build(); @@ -161,7 +161,7 @@ public void testCustomOutputLayerMLN() { //Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works... MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345).learningRate(0.1).list() + new NeuralNetConfiguration.Builder().seed(12345).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new CustomOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .nIn(10).nOut(10).build()) @@ -187,7 +187,7 @@ public void testCustomOutputLayerMLN() { //Fourth: compare to an equivalent standard output layer (should be identical) MultiLayerConfiguration conf2 = - new NeuralNetConfiguration.Builder().seed(12345).learningRate(0.1).weightInit(WeightInit.XAVIER) + new NeuralNetConfiguration.Builder().seed(12345).weightInit(WeightInit.XAVIER) .list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -212,7 +212,7 @@ public void testCustomOutputLayerMLN() { @Test public void testCustomOutputLayerCG() { //Create a ComputationGraphConfiguration with custom output layer, and check JSON and YAML config actually works... - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).learningRate(0.1) + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1", new CustomOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10) @@ -239,7 +239,7 @@ public void testCustomOutputLayerCG() { assertTrue(net.getLayer(1) instanceof CustomOutputLayerImpl); //Fourth: compare to an equivalent standard output layer (should be identical) - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345).learningRate(0.1) + ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345) .graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomActivation.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomActivation.java index 174f65348ddb..283068dbcfe0 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomActivation.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomActivation.java @@ -1,10 +1,10 @@ package org.deeplearning4j.nn.layers.custom.testclasses; import lombok.EqualsAndHashCode; -import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.activations.BaseActivationFunction; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * Created by Alex on 19/12/2016. diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoderTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoderTest.java index 4dc2f5e185f6..f7e38a25303e 100755 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoderTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoderTest.java @@ -31,6 +31,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Arrays; @@ -59,8 +60,8 @@ public void testAutoEncoderBiasInit() { public void testAutoEncoder() throws Exception { MnistDataFetcher fetcher = new MnistDataFetcher(true); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().momentum(0.9f) - .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(1).learningRate(1e-1f) + NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(1).updater(new Sgd(0.1)) .layer(new org.deeplearning4j.nn.conf.layers.AutoEncoder.Builder().nIn(784).nOut(600) .corruptionLevel(0.6) .lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).build()) @@ -88,9 +89,9 @@ public void testAutoEncoder() throws Exception { public void testBackProp() throws Exception { MnistDataFetcher fetcher = new MnistDataFetcher(true); // LayerFactory layerFactory = LayerFactories.getFactory(new org.deeplearning4j.nn.conf.layers.AutoEncoder()); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().momentum(0.9f) + NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(100) - .learningRate(1e-1f) + .updater(new Sgd(0.1)) .layer(new org.deeplearning4j.nn.conf.layers.AutoEncoder.Builder().nIn(784).nOut(600) .corruptionLevel(0.6) .lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).build()) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java index b00daf2f9492..2d4e5587379d 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java @@ -15,6 +15,7 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import static org.junit.Assert.assertEquals; @@ -107,7 +108,7 @@ private static MultiLayerNetwork getDenseMLNConfig(boolean backprop, boolean pre long seed = 6; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) - .learningRate(1e-3).l1(0.3).regularization(true).l2(1e-3).list() + .updater(new Sgd(1e-3)).l1(0.3).l2(1e-3).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(numInputs).nOut(3) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(3).nOut(2) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java index 2e285f8b5a67..2174f5ff47f2 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java @@ -4,7 +4,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.EmbeddingLayer; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; @@ -16,6 +15,7 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.List; @@ -244,7 +244,7 @@ public void testEmbeddingLayerWithMasking() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(0.1).seed(12345).list() + .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new EmbeddingLayer.Builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses) .nOut(5).build()) .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) @@ -259,7 +259,7 @@ public void testEmbeddingLayerWithMasking() { MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(0.1).seed(12345).list() + .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5) .build()) .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBMTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBMTests.java index 72be254a3bfc..9ff3113c13af 100755 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBMTests.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBMTests.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.layers.feedforward.rbm; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.fetchers.IrisDataFetcher; import org.deeplearning4j.datasets.fetchers.MnistDataFetcher; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; @@ -26,7 +25,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.RBM.HiddenUnit; @@ -47,8 +45,11 @@ import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization; import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -130,7 +131,7 @@ public void testMnist() throws Exception { Nd4j.ENFORCE_NUMERICAL_STABILITY = true; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().iterations(30) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1e-1f) + .updater(new Sgd(0.1)) .layer(new org.deeplearning4j.nn.conf.layers.RBM.Builder().nIn(784).nOut(600) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(1, 1e-5)) .lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).build()) @@ -355,7 +356,7 @@ private static RBM getRBMLayer(int nIn, int nOut, HiddenUnit hiddenUnit, Visible int learningRate) { org.deeplearning4j.nn.conf.layers.RBM layer = new org.deeplearning4j.nn.conf.layers.RBM.Builder(hiddenUnit, visibleUnit).nIn(nIn).nOut(nOut) - .learningRate(learningRate).lossFunction(lossFunctions).build(); + .updater(new Sgd(learningRate)).lossFunction(lossFunctions).build(); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).seed(42).layer(layer).build(); @@ -368,7 +369,7 @@ private static RBM getRBMLayer(int nIn, int nOut, HiddenUnit hiddenUnit, Visible boolean pretrain, boolean initialize, int iterations, LossFunctions.LossFunction lossFunctions) { org.deeplearning4j.nn.conf.layers.RBM layer = new org.deeplearning4j.nn.conf.layers.RBM.Builder(hiddenUnit, visibleUnit).nIn(nIn).nOut(nOut) - .learningRate(1e-1f).lossFunction(lossFunctions).build(); + .updater(new Sgd(1e-1f)).lossFunction(lossFunctions).build(); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).seed(42).layer(layer).build(); @@ -380,8 +381,8 @@ private static RBM getRBMLayer(int nIn, int nOut, HiddenUnit hiddenUnit, Visible private static MultiLayerNetwork getRBMMLNNet(boolean backprop, boolean pretrain, INDArray input, int nOut1, int nOut2, WeightInit weightInit) { MultiLayerConfiguration rbm = new NeuralNetConfiguration.Builder().seed(0xDEADBEEF).iterations(1000).biasInit(0) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NONE) - .epsilon(1).weightInit(weightInit) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new NoOp()) + .weightInit(weightInit) .list(new org.deeplearning4j.nn.conf.layers.RBM.Builder(HiddenUnit.BINARY, VisibleUnit.BINARY) .lossFunction(LossFunctions.LossFunction.MSE).nOut(nOut1).build(), new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( @@ -398,8 +399,8 @@ private static MultiLayerNetwork getRBMMLNNet(boolean backprop, boolean pretrain private static MultiLayerNetwork getMultiLayerRBMNet(boolean backprop, boolean pretrain, INDArray input, int nOut1, int nOut2, int nOut3, WeightInit weightInit) { MultiLayerConfiguration rbm = new NeuralNetConfiguration.Builder().seed(0xDEADBEEF).iterations(1000).biasInit(0) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NONE) - .epsilon(1).weightInit(weightInit) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new NoOp()) + .weightInit(weightInit) .list(new org.deeplearning4j.nn.conf.layers.RBM.Builder() .lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).nOut(nOut1).build(), new org.deeplearning4j.nn.conf.layers.RBM.Builder() diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java index 88a991bd3ee6..a2e49453c654 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java @@ -1,6 +1,6 @@ package org.deeplearning4j.nn.layers.normalization; -import org.nd4j.linalg.primitives.Pair; +import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.iterator.impl.ListDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.Layer; @@ -17,7 +17,6 @@ import org.deeplearning4j.nn.updater.MultiLayerUpdater; import org.deeplearning4j.nn.updater.UpdaterBlock; import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.util.ModelSerializer; import org.junit.Before; import org.junit.Test; import org.nd4j.linalg.activations.Activation; @@ -36,9 +35,8 @@ import org.nd4j.linalg.learning.RmsPropUpdater; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -394,7 +392,7 @@ public void checkSerialization() throws Exception { // i.e., make sure state is properly stored MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(2).seed(12345) + .iterations(2).seed(12345) .list() .layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).build()) @@ -421,13 +419,7 @@ public void checkSerialization() throws Exception { assertEquals(out, out2); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(net, baos, true); - baos.close(); - byte[] bArr = baos.toByteArray(); - - ByteArrayInputStream bais = new ByteArrayInputStream(bArr); - MultiLayerNetwork net2 = ModelSerializer.restoreMultiLayerNetwork(bais, true); + MultiLayerNetwork net2 = TestUtils.testModelSerialization(net); INDArray outDeser = net2.output(in, false); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java index 1fcab6f70184..b86ae4f42403 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java @@ -1,6 +1,5 @@ package org.deeplearning4j.nn.layers.normalization; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; @@ -23,6 +22,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.primitives.Pair; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; @@ -120,7 +120,7 @@ public void testRegularization() { // Confirm a structure with regularization true will not throw an error NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() - .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).regularization(true).l1(0.2) + .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).l1(0.2) .l2(0.1).seed(123) .layer(new LocalResponseNormalization.Builder().k(2).n(5).alpha(1e-4).beta(0.75).build()) .build(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java index ae6d7daac072..809865769c3b 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java @@ -1,31 +1,17 @@ package org.deeplearning4j.nn.layers.objdetect; -import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; -import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.util.ModelSerializer; import org.junit.Test; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.executioner.OpExecutioner; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.linalg.util.ArrayUtil; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.util.Arrays; import static org.junit.Assert.*; -import static org.nd4j.linalg.indexing.NDArrayIndex.all; -import static org.nd4j.linalg.indexing.NDArrayIndex.interval; -import static org.nd4j.linalg.indexing.NDArrayIndex.point; +import static org.nd4j.linalg.indexing.NDArrayIndex.*; public class TestYolo2OutputLayer { @@ -100,14 +86,7 @@ public void testYoloActivateScoreBasic() throws Exception { //Finally: test ser/de: - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(net, baos, true); - byte[] bytes = baos.toByteArray(); - ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - MultiLayerNetwork netLoaded = ModelSerializer.restoreMultiLayerNetwork(bais, true); - - assertEquals(net.params(), netLoaded.params()); - assertEquals(net.getLayerWiseConfigurations(), netLoaded.getLayerWiseConfigurations()); + MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net); y2impl = (org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer) netLoaded.getLayer(1); y2impl.setInput(input); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java index b8f25abe3ecd..ac3a07070aed 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java @@ -3,7 +3,6 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.GravesLSTM; @@ -17,6 +16,7 @@ import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Random; @@ -41,8 +41,8 @@ public void testMaskingRnn() { for (int miniBatchSize : minibatchSizes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false) - .updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .build()) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java index 922c2abea808..24c54bc10814 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java @@ -1,14 +1,12 @@ package org.deeplearning4j.nn.layers.recurrent; import junit.framework.TestCase; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -24,7 +22,10 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.AdaGrad; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -257,7 +258,7 @@ public void testSimpleForwardsAndBackwardsActivation() { .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder() .nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.1, 0.1)) - .activation(Activation.TANH).updater(Updater.NONE).build()) + .activation(Activation.TANH).updater(new NoOp()).build()) .build(); final NeuralNetConfiguration confForwards = new NeuralNetConfiguration.Builder() @@ -463,7 +464,8 @@ public void testConvergence() { final MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(5) - .learningRate(0.1).rmsDecay(0.95).regularization(true).l2(0.001).updater(Updater.ADAGRAD) + .updater(new AdaGrad(0.1)) + .l2(0.001) .seed(12345).list().pretrain(false) .layer(0, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder() .activation(Activation.TANH).nIn(2).nOut(2).weightInit(WeightInit.DISTRIBUTION) @@ -524,7 +526,8 @@ public void testSerialization() { final MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(5) - .learningRate(0.1).rmsDecay(0.95).regularization(true).l2(0.001).updater(Updater.ADAGRAD) + .updater(new AdaGrad(0.1)) + .l2(0.001) .seed(12345).list().pretrain(false) .layer(0, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder() .activation(Activation.TANH).nIn(2).nOut(2).weightInit(WeightInit.DISTRIBUTION) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java index c11e28390292..26820ae76e48 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java @@ -1,10 +1,8 @@ package org.deeplearning4j.nn.layers.recurrent; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -16,7 +14,9 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.primitives.Pair; import java.lang.reflect.Field; import java.lang.reflect.Method; @@ -177,7 +177,7 @@ public void testSingleExample() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(0.1).seed(12345).list() + .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().activation(Activation.TANH) .nIn(2).nOut(2).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder() diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestReconstructionDistributions.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestReconstructionDistributions.java index 29fbdb77f54b..476f817b8150 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestReconstructionDistributions.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestReconstructionDistributions.java @@ -10,6 +10,7 @@ import org.deeplearning4j.nn.conf.layers.variational.GaussianReconstructionDistribution; import org.deeplearning4j.nn.conf.layers.variational.ReconstructionDistribution; import org.junit.Test; +import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; @@ -46,7 +47,7 @@ public void testGaussianLogProb() { distributionParams.get(NDArrayIndex.all(), NDArrayIndex.interval(inputSize, 2 * inputSize)) .assign(logStdevSquared); - ReconstructionDistribution dist = new GaussianReconstructionDistribution("identity"); + ReconstructionDistribution dist = new GaussianReconstructionDistribution(Activation.IDENTITY); double negLogProb = dist.negLogProbability(x, distributionParams, average); @@ -115,7 +116,7 @@ public void testBernoulliLogProb() { INDArray distributionParams = Nd4j.rand(minibatch, inputSize).muli(2).subi(1); //i.e., pre-sigmoid prob INDArray prob = Transforms.sigmoid(distributionParams, true); - ReconstructionDistribution dist = new BernoulliReconstructionDistribution("sigmoid"); + ReconstructionDistribution dist = new BernoulliReconstructionDistribution(Activation.SIGMOID); double negLogProb = dist.negLogProbability(x, distributionParams, average); @@ -192,7 +193,7 @@ public void testExponentialLogProb() { INDArray distributionParams = Nd4j.rand(minibatch, inputSize).muli(2).subi(1); //i.e., pre-afn gamma INDArray gammas = Transforms.tanh(distributionParams, true); - ReconstructionDistribution dist = new ExponentialReconstructionDistribution("tanh"); + ReconstructionDistribution dist = new ExponentialReconstructionDistribution(Activation.TANH); double negLogProb = dist.negLogProbability(x, distributionParams, average); @@ -263,11 +264,11 @@ public void gradientCheckReconstructionDistributions() { Random r = new Random(12345); ReconstructionDistribution[] distributions = - new ReconstructionDistribution[] {new GaussianReconstructionDistribution("identity"), - new GaussianReconstructionDistribution("tanh"), - new BernoulliReconstructionDistribution("sigmoid"), - new ExponentialReconstructionDistribution("identity"), - new ExponentialReconstructionDistribution("tanh")}; + new ReconstructionDistribution[] {new GaussianReconstructionDistribution(Activation.IDENTITY), + new GaussianReconstructionDistribution(Activation.TANH), + new BernoulliReconstructionDistribution(Activation.SIGMOID), + new ExponentialReconstructionDistribution(Activation.IDENTITY), + new ExponentialReconstructionDistribution(Activation.TANH)}; List passes = new ArrayList<>(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java index d500a3dcf80f..b3ce278b9147 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java @@ -1,9 +1,7 @@ package org.deeplearning4j.nn.layers.variational; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.variational.*; @@ -18,6 +16,7 @@ import org.nd4j.linalg.api.ops.random.impl.BernoulliDistribution; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.impl.LossMAE; import org.nd4j.linalg.lossfunctions.impl.LossMSE; @@ -253,26 +252,25 @@ public void testJsonYaml() { MultiLayerConfiguration config = new NeuralNetConfiguration.Builder().seed(12345).list() .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() - .reconstructionDistribution(new GaussianReconstructionDistribution("identity")) + .reconstructionDistribution(new GaussianReconstructionDistribution(Activation.IDENTITY)) .nIn(3).nOut(4).encoderLayerSizes(5).decoderLayerSizes(6).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() - .reconstructionDistribution(new GaussianReconstructionDistribution("tanh")) + .reconstructionDistribution(new GaussianReconstructionDistribution(Activation.TANH)) .nIn(7).nOut(8).encoderLayerSizes(9).decoderLayerSizes(10).build()) .layer(2, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() .reconstructionDistribution(new BernoulliReconstructionDistribution()).nIn(11) .nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()) .layer(3, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() - .reconstructionDistribution(new ExponentialReconstructionDistribution("tanh")) + .reconstructionDistribution(new ExponentialReconstructionDistribution(Activation.TANH)) .nIn(11).nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()) .layer(4, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() - //.lossFunction("tanh", LossFunctions.LossFunction.MSE) .lossFunction(new ActivationTanH(), LossFunctions.LossFunction.MSE).nIn(11) .nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()) .layer(5, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() .reconstructionDistribution(new CompositeReconstructionDistribution.Builder() .addDistribution(5, new GaussianReconstructionDistribution()) .addDistribution(5, - new GaussianReconstructionDistribution("tanh")) + new GaussianReconstructionDistribution(Activation.TANH)) .addDistribution(5, new BernoulliReconstructionDistribution()) .build()) .nIn(15).nOut(16).encoderLayerSizes(17).decoderLayerSizes(18).build()) @@ -334,8 +332,8 @@ public void testReconstructionDistributionsSimple() { throw new RuntimeException(); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.3) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3) + .updater(new Sgd(1.0)) .seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) .list().layer(0, new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3) @@ -343,7 +341,7 @@ public void testReconstructionDistributionsSimple() { .pzxActivationFunction(Activation.TANH) .reconstructionDistribution( reconstructionDistributions[i]) - .activation(new ActivationTanH()).updater(Updater.SGD) + .activation(new ActivationTanH()) .build()) .pretrain(true).backprop(false).build(); @@ -400,8 +398,8 @@ public void testReconstructionErrorSimple() { for (int i = 0; i < reconstructionDistributions.length; i++) { INDArray data = Nd4j.rand(minibatch, inOutSize).muli(2).subi(1); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.3) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3) + .updater(new Sgd(1.0)) .seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) .list().layer(0, new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3) @@ -409,7 +407,7 @@ public void testReconstructionErrorSimple() { .pzxActivationFunction(Activation.TANH) .reconstructionDistribution( reconstructionDistributions[i]) - .activation(new ActivationTanH()).updater(Updater.SGD) + .activation(new ActivationTanH()) .build()) .pretrain(true).backprop(false).build(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java index 635a70e94caa..bb6fdcc5f700 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java @@ -1,6 +1,5 @@ package org.deeplearning4j.nn.misc; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; @@ -19,6 +18,7 @@ import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.List; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java index cf3c664753af..f7e9860176cb 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java @@ -2,10 +2,8 @@ import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.gradient.Gradient; @@ -19,6 +17,7 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import java.util.Arrays; @@ -299,19 +298,18 @@ private static void testIrisMiniBatchGradients(int miniBatchSize, int[] hiddenLa */ private static MultiLayerConfiguration getIrisMLPSimpleConfig(int[] hiddenLayerSizes, Activation activationFunction) { - NeuralNetConfiguration.ListBuilder lb = new NeuralNetConfiguration.Builder().iterations(1).learningRate(0.1) - .updater(Updater.SGD).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .regularization(false).seed(12345L).list(); + NeuralNetConfiguration.ListBuilder lb = new NeuralNetConfiguration.Builder().iterations(1).updater(new Sgd(0.1)) + .seed(12345L).list(); for (int i = 0; i < hiddenLayerSizes.length; i++) { int nIn = (i == 0 ? 4 : hiddenLayerSizes[i - 1]); lb.layer(i, new DenseLayer.Builder().nIn(nIn).nOut(hiddenLayerSizes[i]).weightInit(WeightInit.XAVIER) - .updater(Updater.SGD).activation(activationFunction).build()); + .activation(activationFunction).build()); } lb.layer(hiddenLayerSizes.length, new OutputLayer.Builder(LossFunction.MCXENT).nIn(hiddenLayerSizes[hiddenLayerSizes.length - 1]) - .nOut(3).weightInit(WeightInit.XAVIER).updater(Updater.SGD) + .nOut(3).weightInit(WeightInit.XAVIER) .activation(activationFunction.equals(Activation.IDENTITY) ? Activation.IDENTITY : Activation.SOFTMAX) .build()); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/GravesLSTMOutputTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/GravesLSTMOutputTest.java index b1fca78bffc0..8ecb6a95aa20 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/GravesLSTMOutputTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/GravesLSTMOutputTest.java @@ -1,11 +1,9 @@ package org.deeplearning4j.nn.multilayer; import org.deeplearning4j.eval.Evaluation; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.BackpropType; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.GravesLSTM; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -23,6 +21,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.AdaGrad; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.util.FeatureUtil; import org.slf4j.Logger; @@ -90,18 +89,14 @@ private Evaluation eval(MultiLayerNetwork network) { private MultiLayerConfiguration getNetworkConf(int iterations, boolean useTBPTT) { MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .learningRate(0.1).regularization(true).l2(0.0025) - .iterations(iterations).stepFunction( - new NegativeDefaultStepFunction()) + .updater(new AdaGrad(0.1)).l2(0.0025) + .iterations(iterations).stepFunction(new NegativeDefaultStepFunction()) .list() .layer(0, new GravesLSTM.Builder().weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0.0, 0.01)).nIn(nIn) - .nOut(layerSize).updater(Updater.ADAGRAD) - .activation(Activation.TANH).build()) + .nOut(layerSize).activation(Activation.TANH).build()) .layer(1, new OutputLayer.Builder( - LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .updater(Updater.ADAGRAD).nIn(layerSize) + LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(layerSize) .nOut(nIn).activation(Activation.SOFTMAX) .build()) .inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).backprop(true) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java index 8945fc80f632..52ee20eac276 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java @@ -18,7 +18,7 @@ package org.deeplearning4j.nn.multilayer; -import org.nd4j.linalg.primitives.Pair; +import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.eval.Evaluation; @@ -57,14 +57,16 @@ import org.nd4j.linalg.heartbeat.reports.Task; import org.nd4j.linalg.heartbeat.utils.EnvironmentUtils; import org.nd4j.linalg.heartbeat.utils.TaskUtils; +import org.nd4j.linalg.learning.config.NoOp; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.ObjectOutputStream; import java.util.*; import static org.junit.Assert.*; @@ -177,8 +179,8 @@ public void testDbn() throws Exception { Nd4j.MAX_SLICES_TO_PRINT = -1; Nd4j.MAX_ELEMENTS_PER_SLICE = -1; MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().iterations(100).momentum(0.9) - .optimizationAlgo(OptimizationAlgorithm.LBFGS).regularization(true).l2(2e-4) + new NeuralNetConfiguration.Builder().iterations(100) + .optimizationAlgo(OptimizationAlgorithm.LBFGS).l2(2e-4) .list().layer(0, new RBM.Builder(RBM.HiddenUnit.GAUSSIAN, RBM.VisibleUnit.GAUSSIAN).nIn(4).nOut(3) @@ -363,7 +365,7 @@ public void testFeedForwardToLayer() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) - .iterations(5).learningRate(1e-3) + .updater(new Sgd(1e-3)).iterations(5) .list().layer( 0, new RBM.Builder(RBM.HiddenUnit.RECTIFIED, RBM.VisibleUnit.GAUSSIAN).nIn(nIn) @@ -423,7 +425,7 @@ public void testBackpropGradient() { int miniBatch = 5; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(0.1).list() + .updater(new Sgd(0.1)).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).activation(Activation.RELU) .weightInit(WeightInit.XAVIER).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).activation(Activation.RELU) @@ -485,7 +487,7 @@ public void testLayerNames() { layerNameList.add("dnn3"); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(0.1).list() + .updater(new Sgd(0.1)).list() .layer(0, new DenseLayer.Builder().name("dnn1").nIn(nIn).nOut(20).activation(Activation.RELU) .weightInit(WeightInit.XAVIER).build()) .layer(1, new DenseLayer.Builder().name("dnn2").nIn(20).nOut(30).activation(Activation.RELU) @@ -505,7 +507,7 @@ public void testLayerNames() { @Test public void testTranspose() { MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().iterations(100).momentum(0.9).regularization(true).l2(2e-4) + new NeuralNetConfiguration.Builder().iterations(100).l2(2e-4) .list().layer(0, new RBM.Builder(RBM.HiddenUnit.GAUSSIAN, RBM.VisibleUnit.GAUSSIAN).nIn(4).nOut(3) @@ -557,15 +559,15 @@ public void testScoreExamples() { Nd4j.getRandom().setSeed(12345); int nIn = 5; int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01) - .l2(0.01).learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) .build(); - MultiLayerConfiguration confNoReg = new NeuralNetConfiguration.Builder().seed(12345).regularization(false) - .learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() + MultiLayerConfiguration confNoReg = new NeuralNetConfiguration.Builder().seed(12345) + .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) @@ -609,7 +611,7 @@ public void testScoreExamples() { public void testDataSetScore() { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.SIGMOID).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -634,7 +636,7 @@ public void testDataSetScoreCNN() { int height = 3; int nOut = 2; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(12345L).list().layer(0, new ConvolutionLayer.Builder(2, 2).nOut(1).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(2).build()) @@ -659,7 +661,7 @@ public void testDataSetScoreCNN() { public void testPredict() throws Exception { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new DenseLayer.Builder().nIn(784).nOut(50).activation(Activation.RELU).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -699,7 +701,7 @@ public void testCid() throws Exception { @Test public void testOutput() throws Exception { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new DenseLayer.Builder().nIn(784).nOut(50).activation(Activation.RELU).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -735,8 +737,8 @@ public void testGradientUpdate() throws Exception { expectedGradient.setGradientFor("1_b", Nd4j.ones(1, 3)); MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.SGD) - .learningRate(1).activation(Activation.RELU).weightInit(WeightInit.XAVIER) + new NeuralNetConfiguration.Builder().updater(new Sgd(1.0)) + .activation(Activation.RELU).weightInit(WeightInit.XAVIER) .list().layer(0, new DenseLayer.Builder().name("dnn1").nIn(4).nOut(5).build()) .layer(1, new OutputLayer.Builder().name("output").nIn(5).nOut(3) .activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER) @@ -843,8 +845,7 @@ public void testLayerPreTrainSetFalseAfterPreTrain() { public MultiLayerNetwork getRBMModel(boolean preTrain, int nIn, int nOut) { MultiLayerConfiguration rbm = new NeuralNetConfiguration.Builder() - .seed(42).iterations(1).updater(Updater.NONE).epsilon( - 1) + .seed(42).iterations(1).updater(new NoOp()) .weightInit(WeightInit.UNIFORM) .list(new org.deeplearning4j.nn.conf.layers.RBM.Builder() .lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY) @@ -914,7 +915,7 @@ public void testBiasL1L2() { .backprop(true).pretrain(false).build(); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).regularization(true) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .l1Bias(0.1).l2Bias(0.2).iterations(1).weightInit(WeightInit.XAVIER).activation(Activation.TANH) .seed(123).list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( @@ -1000,9 +1001,9 @@ public void testSummary() { int V_HEIGHT = 130; int V_NFRAMES = 150; MultiLayerConfiguration confForArchitecture = - new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l2(0.001) //l2 regularization on all layers + new NeuralNetConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .iterations(1).learningRate(0.4).list() + .iterations(1).list() .layer(0, new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB .nOut(30).stride(4, 4).activation(Activation.RELU).weightInit( WeightInit.RELU) @@ -1016,12 +1017,12 @@ public void testSummary() { .weightInit(WeightInit.RELU).updater(Updater.ADAGRAD) .gradientNormalization( GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).learningRate(0.5).build()) + .gradientNormalizationThreshold(10).build()) .layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50) .nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD) .gradientNormalization( GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).learningRate(0.6) + .gradientNormalizationThreshold(10) .build()) .layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line @@ -1182,14 +1183,7 @@ public void testEpochCounter() throws Exception { assertEquals(4, net.getLayerWiseConfigurations().getEpochCount()); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - - ModelSerializer.writeModel(net, baos, true); - byte[] bytes = baos.toByteArray(); - - ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); + MultiLayerNetwork restored = TestUtils.testModelSerialization(net); assertEquals(4, restored.getLayerWiseConfigurations().getEpochCount()); } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java index b29b2ef75547..88d474243da3 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java @@ -1,6 +1,5 @@ package org.deeplearning4j.nn.multilayer; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.BackpropType; @@ -26,6 +25,7 @@ import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.List; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java index e77817e9fb5e..f2527df6a420 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java @@ -4,7 +4,10 @@ import org.deeplearning4j.eval.EvaluationBinary; import org.deeplearning4j.gradientcheck.LossFunctionGradientCheck; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.BackpropType; +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -20,6 +23,7 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.impl.*; @@ -124,7 +128,7 @@ public void testPerOutputMaskingMLN() { Activation a = act[i]; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345) .list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) @@ -169,7 +173,7 @@ public void testPerOutputMaskingMLN() { //Do the same for CompGraph - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(Updater.NONE) + ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(new NoOp()) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345) .graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize) @@ -209,7 +213,7 @@ public void testCompGraphEvalWithMask() { int nIn = 5; int nOut = 4; - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(Updater.NONE) + ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(new NoOp()) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345) .graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java index 39d6cd60ee2d..b32ca5f88009 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java @@ -3,7 +3,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; @@ -17,6 +16,8 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.NoOp; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Arrays; @@ -45,7 +46,7 @@ public void testVariableLengthSimple() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(0.1).seed(12345).list() + .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()) .layer(1, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2) .nOut(1).build()) @@ -133,7 +134,7 @@ public void testInputMasking() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(0.1).seed(12345).list() + .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()) .layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()) @@ -270,17 +271,17 @@ public void testOutputMaskingScoreMagnitudes() { INDArray labels = Nd4j.ones(miniBatch, nOut, tsLength); MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list() + new NeuralNetConfiguration.Builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5) .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .layer(1, new RnnOutputLayer.Builder( LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(5).nOut(nOut) .weightInit(WeightInit.ZERO) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); @@ -333,33 +334,33 @@ public void testOutputMasking() { INDArray input = Nd4j.rand(new int[] {miniBatch, nIn, tsLength}); MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list() + new NeuralNetConfiguration.Builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5) .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .layer(1, new RnnOutputLayer.Builder( LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(5).nOut(nOut) .weightInit(WeightInit.XAVIER) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); MultiLayerConfiguration conf2 = - new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list() + new NeuralNetConfiguration.Builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5) .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .layer(1, new RnnOutputLayer.Builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nIn(5).nOut(nOut) .weightInit(WeightInit.XAVIER) - .updater(Updater.NONE).build()) + .updater(new NoOp()).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork mln2 = new MultiLayerNetwork(conf2); mln2.init(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java index a9f7a1e9a42f..9a411b799e61 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java @@ -3,7 +3,6 @@ import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.gradient.Gradient; @@ -14,6 +13,10 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.learning.config.IUpdater; +import org.nd4j.linalg.learning.config.Nesterovs; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import static org.junit.Assert.assertEquals; @@ -34,13 +37,12 @@ public void testGradientApplyMultiLayerNetwork() { int nOut = 10; for (boolean regularization : new boolean[] {false, true}) { - for (Updater u : new Updater[] {Updater.SGD, Updater.NESTEROVS, Updater.ADAM}) { - // for (Updater u : new Updater[]{Updater.ADAM}) { + for (IUpdater u : new IUpdater[] {new Sgd(0.1), new Nesterovs(0.1), new Adam(0.1)}) { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).activation(Activation.TANH) - .weightInit(WeightInit.XAVIER).updater(u).learningRate(0.1) - .regularization(regularization).l1(regularization ? 0.2 : 0.0) + .weightInit(WeightInit.XAVIER).updater(u) + .l1(regularization ? 0.2 : 0.0) .l2(regularization ? 0.3 : 0.0).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(10).build()) .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(2, @@ -82,7 +84,7 @@ public void testGradientApplyMultiLayerNetwork() { Gradient g = net1GradCalc.gradient(); INDArray gBefore = g.gradient().dup(); //Net 1 gradient should be modified INDArray net2GradBefore = net2GradUpd.gradient().gradient().dup(); //But net 2 gradient should not be - net2GradUpd.getUpdater().update(net2GradUpd, g, 0, minibatch); + net2GradUpd.getUpdater().update(net2GradUpd, g, 0, 0, minibatch); INDArray gAfter = g.gradient().dup(); INDArray net2GradAfter = net2GradUpd.gradient().gradient().dup(); @@ -99,7 +101,7 @@ public void testGradientApplyMultiLayerNetwork() { //============================= - if (u != Updater.SGD) { + if (!(u instanceof Sgd)) { net2GradUpd.getUpdater().getStateViewArray().assign(net1GradCalc.getUpdater().getStateViewArray()); } assertEquals(net1GradCalc.params(), net2GradUpd.params()); @@ -127,12 +129,12 @@ public void testGradientApplyComputationGraph() { int nOut = 10; for (boolean regularization : new boolean[] {false, true}) { - for (Updater u : new Updater[] {Updater.SGD, Updater.ADAM}) { + for (IUpdater u : new IUpdater[] {new Sgd(0.1), new Adam(0.1)}) { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).activation(Activation.TANH) - .weightInit(WeightInit.XAVIER).updater(u).learningRate(0.1) - .regularization(regularization).l1(regularization ? 0.2 : 0.0) + .weightInit(WeightInit.XAVIER).updater(u) + .l1(regularization ? 0.2 : 0.0) .l2(regularization ? 0.3 : 0.0).graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(10).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).build(), "0") @@ -174,7 +176,7 @@ public void testGradientApplyComputationGraph() { Gradient g = net1GradCalc.gradient(); INDArray gBefore = g.gradient().dup(); //Net 1 gradient should be modified INDArray net2GradBefore = net2GradUpd.gradient().gradient().dup(); //But net 2 gradient should not be - net2GradUpd.getUpdater().update(g, 0, minibatch); + net2GradUpd.getUpdater().update(g, 0, 0, minibatch); INDArray gAfter = g.gradient().dup(); INDArray net2GradAfter = net2GradUpd.gradient().gradient().dup(); @@ -190,7 +192,7 @@ public void testGradientApplyComputationGraph() { assertEquals(net1GradCalc.params(), net2GradUpd.params()); //============================= - if (u != Updater.SGD) { + if (!(u instanceof Sgd)) { net2GradUpd.getUpdater().getStateViewArray().assign(net1GradCalc.getUpdater().getStateViewArray()); } assertEquals(net1GradCalc.params(), net2GradUpd.params()); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningJson.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningJson.java index a555336d2a6c..82be21432534 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningJson.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningJson.java @@ -1,8 +1,8 @@ package org.deeplearning4j.nn.transferlearning; -import org.deeplearning4j.nn.conf.Updater; import org.junit.Test; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.learning.config.AdaGrad; import static org.junit.Assert.assertEquals; @@ -15,7 +15,7 @@ public class TestTransferLearningJson { public void testJsonYaml() { FineTuneConfiguration c = new FineTuneConfiguration.Builder().activation(Activation.ELU).backprop(true) - .updater(Updater.ADAGRAD).biasLearningRate(10.0).build(); + .updater(new AdaGrad(1.0)).biasUpdater(new AdaGrad(10.0)).build(); String asJson = c.toJson(); String asYaml = c.toYaml(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java index d1179b90bdf6..137ab9bf8e68 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java @@ -1,9 +1,9 @@ package org.deeplearning4j.nn.transferlearning; +import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -11,15 +11,13 @@ import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.FrozenLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.util.ModelSerializer; import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.util.Map; import static org.junit.Assert.*; @@ -32,13 +30,13 @@ public class TestTransferLearningModelSerializer { @Test public void testModelSerializerFrozenLayers() throws Exception { - FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().learningRate(0.1).build(); + FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); int nIn = 6; int nOut = 3; - MultiLayerConfiguration origConf = new NeuralNetConfiguration.Builder().learningRate(0.1).updater(Updater.SGD) - .activation(Activation.TANH).regularization(true).dropOut(0.5).list() + MultiLayerConfiguration origConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + .activation(Activation.TANH).dropOut(0.5).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5).build()) .layer(1, new DenseLayer.Builder().nIn(5).nOut(4).build()) .layer(2, new DenseLayer.Builder().nIn(4).nOut(3).build()) @@ -60,15 +58,7 @@ public void testModelSerializerFrozenLayers() throws Exception { assertTrue(withFrozen.getLayerWiseConfigurations().getConf(1) .getLayer() instanceof org.deeplearning4j.nn.conf.layers.misc.FrozenLayer); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(withFrozen, baos, false); - baos.close(); - - byte[] asBytes = baos.toByteArray(); - - ByteArrayInputStream bais = new ByteArrayInputStream(asBytes); - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais); + MultiLayerNetwork restored = TestUtils.testModelSerialization(withFrozen); assertTrue(restored.getLayer(0) instanceof FrozenLayer); assertTrue(restored.getLayer(1) instanceof FrozenLayer); @@ -89,13 +79,12 @@ public void testModelSerializerFrozenLayers() throws Exception { @Test public void testModelSerializerFrozenLayersCompGraph() throws Exception { - FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().learningRate(0.1).build(); + FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); int nIn = 6; int nOut = 3; - ComputationGraphConfiguration origConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .updater(Updater.SGD).activation(Activation.TANH).graphBuilder().addInputs("in") + ComputationGraphConfiguration origConf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(5).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(4).build(), "0") .addLayer("2", new DenseLayer.Builder().nIn(4).nOut(3).build(), "1") @@ -119,15 +108,7 @@ public void testModelSerializerFrozenLayersCompGraph() throws Exception { assertTrue(l0 instanceof org.deeplearning4j.nn.conf.layers.misc.FrozenLayer); assertTrue(l1 instanceof org.deeplearning4j.nn.conf.layers.misc.FrozenLayer); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(withFrozen, baos, false); - baos.close(); - - byte[] asBytes = baos.toByteArray(); - - ByteArrayInputStream bais = new ByteArrayInputStream(asBytes); - ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais); + ComputationGraph restored = TestUtils.testModelSerialization(withFrozen); assertTrue(restored.getLayer(0) instanceof FrozenLayer); assertTrue(restored.getLayer(1) instanceof FrozenLayer); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java index 1e613f74c7f1..ba941eadc603 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java @@ -3,7 +3,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -14,8 +13,14 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.learning.config.Nesterovs; +import org.nd4j.linalg.learning.config.RmsProp; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; +import java.util.Collections; + import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; @@ -31,8 +36,8 @@ public void simpleFineTune() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); //original conf ComputationGraphConfiguration confToChange = new NeuralNetConfiguration.Builder().seed(rng) - .optimizationAlgo(OptimizationAlgorithm.LBFGS).updater(Updater.NESTEROVS).momentum(0.99) - .learningRate(0.01).graphBuilder().addInputs("layer0In").setInputTypes(InputType.feedForward(4)) + .optimizationAlgo(OptimizationAlgorithm.LBFGS).updater(new Nesterovs(0.01, 0.99)) + .graphBuilder().addInputs("layer0In").setInputTypes(InputType.feedForward(4)) .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In") .addLayer("layer1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( @@ -44,8 +49,8 @@ public void simpleFineTune() { //conf with learning parameters changed ComputationGraphConfiguration expectedConf = new NeuralNetConfiguration.Builder().seed(rng) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.RMSPROP) - .learningRate(0.2).regularization(true).graphBuilder().addInputs("layer0In") + .updater(new RmsProp(0.2)) + .graphBuilder().addInputs("layer0In") .setInputTypes(InputType.feedForward(4)) .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In") .addLayer("layer1", @@ -65,10 +70,7 @@ public void simpleFineTune() { ComputationGraph modelNow = new TransferLearning.GraphBuilder(modelToFineTune) .fineTuneConfiguration(new FineTuneConfiguration.Builder().seed(rng) - .optimizationAlgo( - OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.RMSPROP).learningRate(0.2).regularization(true) - .build()) + .updater(new RmsProp(0.2)).build()) .build(); //Check json @@ -85,11 +87,9 @@ public void simpleFineTune() { public void testNoutChanges() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 2)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); - FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY).build(); ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") @@ -156,11 +156,9 @@ public void testNoutChanges() { public void testRemoveAndAdd() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); - FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY).build(); ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") @@ -226,139 +224,134 @@ public void testAllWithCNN() { DataSet randomData = new DataSet(Nd4j.rand(10, 28 * 28 * 3).reshape(10, 3, 28, 28), Nd4j.rand(10, 10)); ComputationGraph modelToFineTune = new ComputationGraph( - new NeuralNetConfiguration.Builder().seed(123).iterations(1).learningRate(.01) - .weightInit(WeightInit.XAVIER) - .optimizationAlgo( - OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.NESTEROVS).momentum(0.9).graphBuilder() - .addInputs("layer0In") - .setInputTypes(InputType.convolutionalFlat(28, 28, - 3)) - .addLayer("layer0", - new ConvolutionLayer.Builder(5, 5).nIn(3) - .stride(1, 1).nOut(20) - .activation(Activation.IDENTITY) - .build(), - "layer0In") - .addLayer("layer1", - new SubsamplingLayer.Builder( - SubsamplingLayer.PoolingType.MAX) - .kernelSize(2, 2) - .stride(2, 2) - .build(), - "layer0") - .addLayer("layer2", - new ConvolutionLayer.Builder(5, 5).stride(1, 1) - .nOut(50) - .activation(Activation.IDENTITY) - .build(), - "layer1") - .addLayer("layer3", - new SubsamplingLayer.Builder( - SubsamplingLayer.PoolingType.MAX) - .kernelSize(2, 2) - .stride(2, 2) - .build(), - "layer2") - .addLayer("layer4", - new DenseLayer.Builder() - .activation(Activation.RELU) - .nOut(500).build(), - "layer3") - .addLayer("layer5", - new DenseLayer.Builder() - .activation(Activation.RELU) - .nOut(250).build(), - "layer4") - .addLayer("layer6", - new OutputLayer.Builder( - LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(100) - .activation(Activation.SOFTMAX) - .build(), - "layer5") - .setOutputs("layer5").backprop(true).pretrain(false).build()); + new NeuralNetConfiguration.Builder().seed(123).iterations(1) + .weightInit(WeightInit.XAVIER) + .updater(new Nesterovs(0.01, 0.9)).graphBuilder() + .addInputs("layer0In") + .setInputTypes(InputType.convolutionalFlat(28, 28, + 3)) + .addLayer("layer0", + new ConvolutionLayer.Builder(5, 5).nIn(3) + .stride(1, 1).nOut(20) + .activation(Activation.IDENTITY) + .build(), + "layer0In") + .addLayer("layer1", + new SubsamplingLayer.Builder( + SubsamplingLayer.PoolingType.MAX) + .kernelSize(2, 2) + .stride(2, 2) + .build(), + "layer0") + .addLayer("layer2", + new ConvolutionLayer.Builder(5, 5).stride(1, 1) + .nOut(50) + .activation(Activation.IDENTITY) + .build(), + "layer1") + .addLayer("layer3", + new SubsamplingLayer.Builder( + SubsamplingLayer.PoolingType.MAX) + .kernelSize(2, 2) + .stride(2, 2) + .build(), + "layer2") + .addLayer("layer4", + new DenseLayer.Builder() + .activation(Activation.RELU) + .nOut(500).build(), + "layer3") + .addLayer("layer5", + new DenseLayer.Builder() + .activation(Activation.RELU) + .nOut(250).build(), + "layer4") + .addLayer("layer6", + new OutputLayer.Builder( + LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .nOut(100) + .activation(Activation.SOFTMAX) + .build(), + "layer5") + .setOutputs("layer5").backprop(true).pretrain(false).build()); modelToFineTune.init(); //this will override the learning configuration set in the model - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().seed(456).learningRate(0.001) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD); - FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().seed(456).learningRate(0.001) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().seed(456).updater(new Sgd(0.001)); + FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().seed(456).updater(new Sgd(0.001)) .build(); ComputationGraph modelNow = - new TransferLearning.GraphBuilder(modelToFineTune).fineTuneConfiguration(fineTuneConfiguration) - .setFeatureExtractor("layer1").nOutReplace("layer4", 600, WeightInit.XAVIER) - .removeVertexAndConnections("layer5").removeVertexAndConnections("layer6") - .setInputs("layer0In").setInputTypes(InputType.convolutionalFlat(28, 28, 3)) - .addLayer("layer5", - new DenseLayer.Builder().activation(Activation.RELU).nIn(600) - .nOut(300).build(), - "layer4") - .addLayer("layer6", - new DenseLayer.Builder().activation(Activation.RELU).nIn(300) - .nOut(150).build(), - "layer5") - .addLayer("layer7", - new DenseLayer.Builder().activation(Activation.RELU).nIn(150) - .nOut(50).build(), - "layer6") - .addLayer("layer8", - new OutputLayer.Builder( - LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .activation(Activation.SOFTMAX) - .nIn(50).nOut(10).build(), - "layer7") - .setOutputs("layer8").build(); + new TransferLearning.GraphBuilder(modelToFineTune).fineTuneConfiguration(fineTuneConfiguration) + .setFeatureExtractor("layer1").nOutReplace("layer4", 600, WeightInit.XAVIER) + .removeVertexAndConnections("layer5").removeVertexAndConnections("layer6") + .setInputs("layer0In").setInputTypes(InputType.convolutionalFlat(28, 28, 3)) + .addLayer("layer5", + new DenseLayer.Builder().activation(Activation.RELU).nIn(600) + .nOut(300).build(), + "layer4") + .addLayer("layer6", + new DenseLayer.Builder().activation(Activation.RELU).nIn(300) + .nOut(150).build(), + "layer5") + .addLayer("layer7", + new DenseLayer.Builder().activation(Activation.RELU).nIn(150) + .nOut(50).build(), + "layer6") + .addLayer("layer8", + new OutputLayer.Builder( + LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .activation(Activation.SOFTMAX) + .nIn(50).nOut(10).build(), + "layer7") + .setOutputs("layer8").build(); ComputationGraph modelExpectedArch = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") - .setInputTypes(InputType.convolutionalFlat(28, - 28, 3)) - .addLayer("layer0", - new FrozenLayer(new ConvolutionLayer.Builder(5, 5).nIn(3) - .stride(1, 1).nOut(20) - .activation(Activation.IDENTITY).build()), - "layer0In") - .addLayer("layer1", - new FrozenLayer(new SubsamplingLayer.Builder( - SubsamplingLayer.PoolingType.MAX) - .kernelSize(2, 2).stride(2, 2) - .build()), - "layer0") - .addLayer("layer2", - new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50) - .activation(Activation.IDENTITY).build(), - "layer1") - .addLayer("layer3", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) - .kernelSize(2, 2).stride(2, 2).build(), - "layer2") - .addLayer("layer4", - new DenseLayer.Builder().activation(Activation.RELU).nOut(600) - .build(), - "layer3") - .addLayer("layer5", - new DenseLayer.Builder().activation(Activation.RELU).nOut(300) - .build(), - "layer4") - .addLayer("layer6", - new DenseLayer.Builder().activation(Activation.RELU).nOut(150) - .build(), - "layer5") - .addLayer("layer7", - new DenseLayer.Builder().activation(Activation.RELU).nOut(50) - .build(), - "layer6") - .addLayer("layer8", - new OutputLayer.Builder( - LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(10) - .activation(Activation.SOFTMAX) - .build(), - "layer7") - .setOutputs("layer8").backprop(true).pretrain(false).build()); + .setInputTypes(InputType.convolutionalFlat(28,28, 3)) + .addLayer("layer0", + new FrozenLayer(new ConvolutionLayer.Builder(5, 5).nIn(3) + .stride(1, 1).nOut(20) + .activation(Activation.IDENTITY).build()), + "layer0In") + .addLayer("layer1", + new FrozenLayer(new SubsamplingLayer.Builder( + SubsamplingLayer.PoolingType.MAX) + .kernelSize(2, 2).stride(2, 2) + .build()), + "layer0") + .addLayer("layer2", + new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50) + .activation(Activation.IDENTITY).build(), + "layer1") + .addLayer("layer3", + new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .kernelSize(2, 2).stride(2, 2).build(), + "layer2") + .addLayer("layer4", + new DenseLayer.Builder().activation(Activation.RELU).nOut(600) + .build(), + "layer3") + .addLayer("layer5", + new DenseLayer.Builder().activation(Activation.RELU).nOut(300) + .build(), + "layer4") + .addLayer("layer6", + new DenseLayer.Builder().activation(Activation.RELU).nOut(150) + .build(), + "layer5") + .addLayer("layer7", + new DenseLayer.Builder().activation(Activation.RELU).nOut(50) + .build(), + "layer6") + .addLayer("layer8", + new OutputLayer.Builder( + LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .nOut(10) + .activation(Activation.SOFTMAX) + .build(), + "layer7") + .setOutputs("layer8").backprop(true).pretrain(false).build()); modelExpectedArch.init(); modelExpectedArch.getVertex("layer0").setLayerAsFrozen(); modelExpectedArch.getVertex("layer1").setLayerAsFrozen(); @@ -380,11 +373,10 @@ public void testAllWithCNN() { @Test public void testTransferGlobalPool() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.ADAM) - .adamMeanDecay(0.9).adamVarDecay(0.999).weightInit(WeightInit.XAVIER).learningRate(0.1) + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new Adam(0.1)) + .weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") - .addLayer("blstm1", - new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10) + .addLayer("blstm1",new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10) .activation(Activation.TANH).build(), "in") .addLayer("pool", new GlobalPoolingLayer.Builder().build(), "blstm1") @@ -397,19 +389,20 @@ public void testTransferGlobalPool() { g.init(); FineTuneConfiguration fineTuneConfiguration = - new FineTuneConfiguration.Builder().seed(12345).learningRate(0.01).build(); + new FineTuneConfiguration.Builder().seed(12345).updater(new Sgd(0.01)).build(); ComputationGraph graph = new TransferLearning.GraphBuilder(g).fineTuneConfiguration(fineTuneConfiguration) .removeVertexKeepConnections("out").setFeatureExtractor("dense") - .addLayer("out", new OutputLayer.Builder().updater(Updater.ADAM).adamMeanDecay(0.9) - .adamVarDecay(0.999).weightInit(WeightInit.XAVIER) + .addLayer("out", new OutputLayer.Builder().updater(new Adam(0.1)) + .weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(10).nOut(5).build(), "dense") .build(); ComputationGraphConfiguration confExpected = new NeuralNetConfiguration.Builder().seed(12345) - .updater(Updater.ADAM).adamMeanDecay(0.9).adamVarDecay(0.999).weightInit(WeightInit.XAVIER) - .learningRate(0.01).graphBuilder().addInputs("in") + .updater(new Sgd(0.01)) + .weightInit(WeightInit.XAVIER) + .graphBuilder().addInputs("in") .addLayer("blstm1", new FrozenLayer(new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10) .activation(Activation.TANH).build()), @@ -417,12 +410,15 @@ public void testTransferGlobalPool() { .addLayer("pool", new FrozenLayer(new GlobalPoolingLayer.Builder().build()), "blstm1") .addLayer("dense", new FrozenLayer(new DenseLayer.Builder().nIn(10).nOut(10).build()), "pool") .addLayer("out", new OutputLayer.Builder().nIn(10).nOut(5).activation(Activation.SOFTMAX) + .updater(new Adam(0.1)) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "dense") .setOutputs("out").build(); ComputationGraph modelExpected = new ComputationGraph(confExpected); modelExpected.init(); - assertEquals(confExpected, graph.getConfiguration()); + +// assertEquals(confExpected, graph.getConfiguration()); + assertEquals(confExpected.toJson(), graph.getConfiguration().toJson()); } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java index ccac20a7ddb1..809855503caf 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java @@ -5,7 +5,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.BaseLayer; @@ -18,6 +17,8 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.MultiDataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import static org.junit.Assert.*; @@ -36,8 +37,8 @@ public void testMergeAndFreeze() { // (b) Test global override (should be selective) - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.ADAM) - .learningRate(1e-4).activation(Activation.LEAKYRELU).graphBuilder().addInputs("in1", "in2") + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Adam(1e-4)) + .activation(Activation.LEAKYRELU).graphBuilder().addInputs("in1", "in2") .addLayer("A", new DenseLayer.Builder().nIn(10).nOut(9).build(), "in1") .addLayer("B", new DenseLayer.Builder().nIn(9).nOut(8).build(), "A") .addLayer("C", new DenseLayer.Builder().nIn(7).nOut(6).build(), "in2") @@ -59,8 +60,7 @@ public void testMergeAndFreeze() { ComputationGraph graph2 = new TransferLearning.GraphBuilder(graph) - .fineTuneConfiguration( - new FineTuneConfiguration.Builder().learningRate(2e-2).build()) + .fineTuneConfiguration(new FineTuneConfiguration.Builder().updater(new Adam(2e-2)).build()) .setFeatureExtractor("C").build(); boolean cFound = false; @@ -79,8 +79,7 @@ public void testMergeAndFreeze() { //Also check config: BaseLayer bl = ((BaseLayer) l.conf().getLayer()); - assertEquals(Updater.ADAM, bl.getUpdater()); - assertEquals(2e-2, bl.getLearningRate(), 1e-5); + assertEquals(new Adam(2e-2), bl.getIUpdater()); assertEquals(Activation.LEAKYRELU.getActivationFunction(), bl.getActivationFn()); } assertTrue(cFound); @@ -90,9 +89,9 @@ public void testMergeAndFreeze() { @Test public void testSimplerMergeBackProp() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.9) + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9)) .activation(Activation.IDENTITY) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD); + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); /* inCentre inRight @@ -172,9 +171,8 @@ public void testSimplerMergeBackProp() { @Test public void testLessSimpleMergeBackProp() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.9) - .activation(Activation.IDENTITY) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD); + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9)) + .activation(Activation.IDENTITY); /* inCentre inRight @@ -239,9 +237,8 @@ public void testLessSimpleMergeBackProp() { @Test public void testAddOutput() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.9) - .activation(Activation.IDENTITY) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD); + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9)) + .activation(Activation.IDENTITY); ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight") .addLayer("denseCentre0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "inCentre") diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java index 02f20d466386..50dadb544bbe 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java @@ -4,7 +4,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.graph.SubsetVertex; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -17,6 +16,7 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.MultiDataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.List; @@ -32,9 +32,9 @@ public class TransferLearningHelperTest { @Test public void tesUnfrozenSubset() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1).seed(124) + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().seed(124) .activation(Activation.IDENTITY) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD); + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)); /* (inCentre) (inRight) | | @@ -114,9 +114,9 @@ public void tesUnfrozenSubset() { @Test public void testFitUnFrozen() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.9).seed(124) + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9)).seed(124) .activation(Activation.IDENTITY) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD); + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight") .addLayer("denseCentre0", new DenseLayer.Builder().nIn(10).nOut(9).build(), "inCentre") @@ -187,8 +187,8 @@ public void testFitUnFrozen() { public void testMLN() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .activation(Activation.IDENTITY); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(overallConf.clone().list() diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java index 4bc7842d7486..e574b3658cb7 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java @@ -2,7 +2,10 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.BackpropType; +import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -16,6 +19,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.*; import org.nd4j.linalg.lossfunctions.LossFunctions; import static org.junit.Assert.*; @@ -34,7 +38,7 @@ public void simpleFineTune() { //original conf NeuralNetConfiguration.Builder confToChange = new NeuralNetConfiguration.Builder().seed(rng).optimizationAlgo(OptimizationAlgorithm.LBFGS) - .updater(Updater.NESTEROVS).momentum(0.99).learningRate(0.01); + .updater(new Nesterovs(0.01, 0.99)); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(confToChange.list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) @@ -46,24 +50,22 @@ public void simpleFineTune() { //model after applying changes with transfer learning MultiLayerNetwork modelNow = - new TransferLearning.Builder(modelToFineTune) - .fineTuneConfiguration(new FineTuneConfiguration.Builder().seed(rng) - .optimizationAlgo( - OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.RMSPROP).learningRate(0.5) //Intent: override both weight and bias LR, unless bias LR is manually set also - .l2(0.4).regularization(true).build()) - .build(); + new TransferLearning.Builder(modelToFineTune) + .fineTuneConfiguration(new FineTuneConfiguration.Builder().seed(rng) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .updater(new RmsProp(0.5)) //Intent: override both weight and bias LR, unless bias LR is manually set also + .l2(0.4).build()) + .build(); for (org.deeplearning4j.nn.api.Layer l : modelNow.getLayers()) { BaseLayer bl = ((BaseLayer) l.conf().getLayer()); - assertEquals(Updater.RMSPROP, bl.getUpdater()); - assertEquals(0.5, bl.getLearningRate(), 1e-6); + assertEquals(new RmsProp(0.5), bl.getIUpdater()); } NeuralNetConfiguration.Builder confSet = new NeuralNetConfiguration.Builder().seed(rng) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.RMSPROP) - .learningRate(0.5).l2(0.4).regularization(true); + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .updater(new RmsProp(0.5)).l2(0.4); MultiLayerNetwork expectedModel = new MultiLayerNetwork(confSet.list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) @@ -94,10 +96,8 @@ public void simpleFineTune() { public void testNoutChanges() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 2)); - NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD); - FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)); + FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)) .build(); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(equivalentConf.list() @@ -155,11 +155,8 @@ public void testNoutChanges() { public void testRemoveAndAdd() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD); - FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().learningRate(0.1) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) - .build(); + NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)); + FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(//overallConf.list() equivalentConf.list().layer(0, new DenseLayer.Builder().nIn(4).nOut(5).build()) @@ -177,7 +174,7 @@ public void testRemoveAndAdd() { .nOutReplace(0, 7, WeightInit.XAVIER, WeightInit.XAVIER) .nOutReplace(2, 5, WeightInit.XAVIER).removeOutputLayer() .addLayer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5) - .nOut(3).learningRate(0.5).activation(Activation.SOFTMAX) + .nOut(3).updater(new Sgd(0.5)).activation(Activation.SOFTMAX) .build()) .build(); @@ -187,7 +184,7 @@ public void testRemoveAndAdd() { .layer(2, new DenseLayer.Builder().nIn(2).nOut(5).build()) .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) - .learningRate(0.5).nIn(5).nOut(3).build()) + .updater(new Sgd(0.5)).nIn(5).nOut(3).build()) .build()); modelExpectedArch.init(); @@ -214,34 +211,29 @@ public void testRemoveAndProcessing() { int V_NFRAMES = 150; MultiLayerConfiguration confForArchitecture = - new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l2(0.001) //l2 regularization on all layers + new NeuralNetConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .iterations(1).learningRate(0.4).list() + .iterations(1).updater(new AdaGrad(0.4)).list() .layer(0, new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB .nOut(30).stride(4, 4).activation(Activation.RELU).weightInit( - WeightInit.RELU) - .updater(Updater.ADAGRAD).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 + WeightInit.RELU).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(3, 3).stride(2, 2).build()) //(31-3+0)/2+1 = 15 .layer(2, new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2) .activation(Activation.RELU).weightInit(WeightInit.RELU) - .updater(Updater.ADAGRAD).build()) //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490 + .build()) //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490 .layer(3, new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50) - .weightInit(WeightInit.RELU).updater(Updater.ADAGRAD) - .gradientNormalization( - GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).learningRate(0.5).build()) + .weightInit(WeightInit.RELU).updater(new AdaGrad(0.5)) + .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) + .gradientNormalizationThreshold(10).build()) .layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50) - .nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD) - .gradientNormalization( - GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).learningRate(0.6) - .build()) + .nOut(50).weightInit(WeightInit.XAVIER).updater(new AdaGrad(0.6)) + .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) + .gradientNormalizationThreshold(10).build()) .layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line - .updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER) - .gradientNormalization( - GradientNormalization.ClipElementWiseAbsoluteValue) + .weightInit(WeightInit.XAVIER) + .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10).build()) .inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)) .inputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)) @@ -254,18 +246,14 @@ public void testRemoveAndProcessing() { MultiLayerNetwork modelToTweak = new MultiLayerNetwork( new NeuralNetConfiguration.Builder().seed(12345) - //.regularization(true).l2(0.001) //change l2 - .optimizationAlgo( - OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .iterations(1).learningRate(0.1) //change learning rate - .updater(Updater.RMSPROP)// change updater + .iterations(1).updater(new RmsProp(0.1)) .list() .layer(0, new ConvolutionLayer.Builder(10, 10) //Only keep the first layer the same .nIn(3) //3 channels: RGB .nOut(30).stride(4, 4) .activation(Activation.RELU) .weightInit(WeightInit.RELU) - .updater(Updater.ADAGRAD).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 + .updater(new AdaGrad(0.1)).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 .layer(1, new SubsamplingLayer.Builder( SubsamplingLayer.PoolingType.MAX) //change kernel size .kernelSize(5, 5).stride(2, 2) @@ -280,7 +268,7 @@ public void testRemoveAndProcessing() { .gradientNormalization( GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10) - .learningRate(0.01).build()) + .updater(new RmsProp(0.01)).build()) .layer(4, new GravesLSTM.Builder() //change here .activation(Activation.SOFTSIGN).nIn(50) .nOut(25).weightInit(WeightInit.XAVIER) @@ -290,15 +278,11 @@ public void testRemoveAndProcessing() { .activation(Activation.SOFTMAX) .nIn(25).nOut(4) .weightInit(WeightInit.XAVIER) - .gradientNormalization( - GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold( - 10) + .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) + .gradientNormalizationThreshold(10) .build()) - .inputPreProcessor(0, - new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)) - .inputPreProcessor(3, - new CnnToFeedForwardPreProcessor(5, 5, 10)) + .inputPreProcessor(0,new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)) + .inputPreProcessor(3,new CnnToFeedForwardPreProcessor(5, 5, 10)) .inputPreProcessor(4, new FeedForwardToRnnPreProcessor()) .pretrain(false).backprop(true) .backpropType(BackpropType.TruncatedBPTT) @@ -308,28 +292,25 @@ public void testRemoveAndProcessing() { MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToTweak) .fineTuneConfiguration( - new FineTuneConfiguration.Builder().seed(12345).regularization(true).l2(0.001) //l2 regularization on all layers - .optimizationAlgo( - OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.ADAGRAD).weightInit(WeightInit.RELU) - .iterations(1).learningRate(0.4).build()) + new FineTuneConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers + .updater(new AdaGrad(0.4)) + .weightInit(WeightInit.RELU).build()) .removeLayersFromOutput(5) .addLayer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3) .stride(2, 2).build()) .addLayer(new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2) - .activation(Activation.RELU).weightInit(WeightInit.RELU) - .updater(Updater.ADAGRAD).build()) + .activation(Activation.RELU).weightInit(WeightInit.RELU).build()) .addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50) - .weightInit(WeightInit.RELU).updater(Updater.ADAGRAD) + .weightInit(WeightInit.RELU).updater(new AdaGrad(0.5)) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).learningRate(0.5).build()) + .gradientNormalizationThreshold(10).build()) .addLayer(new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50) - .weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD) + .weightInit(WeightInit.XAVIER).updater(new AdaGrad(0.6)) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).learningRate(0.6).build()) + .gradientNormalizationThreshold(10).build()) .addLayer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line - .updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER) + .weightInit(WeightInit.XAVIER) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10).build()) .setInputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)) @@ -366,12 +347,9 @@ public void testAllWithCNN() { DataSet randomData = new DataSet(Nd4j.rand(10, 28 * 28 * 3).reshape(10, 3, 28, 28), Nd4j.rand(10, 10)); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( - new NeuralNetConfiguration.Builder().seed(123).iterations(1).learningRate(.01) + new NeuralNetConfiguration.Builder().seed(123) .weightInit(WeightInit.XAVIER) - .optimizationAlgo( - OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.NESTEROVS).momentum( - 0.9) + .updater(new Nesterovs(0.01, 0.9)) .list() .layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).stride(1, 1) .nOut(20).activation(Activation.IDENTITY) @@ -401,11 +379,10 @@ public void testAllWithCNN() { modelToFineTune.init(); INDArray asFrozenFeatures = modelToFineTune.feedForwardToLayer(2, randomData.getFeatures(), false).get(2); //10x20x12x12 - NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().learningRate(0.2) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD); + NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.2)) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); - FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().learningRate(0.2) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) + FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.2)) .build(); MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).fineTuneConfiguration(overallConf) @@ -466,8 +443,8 @@ public void testFineTuneOverride() { //Check that fine-tune overrides are selective - i.e., if I only specify a new LR, only the LR should be modified MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(1e-4).updater(Updater.ADAM) - .activation(Activation.TANH).weightInit(WeightInit.RELU).regularization(true) + new NeuralNetConfiguration.Builder().updater(new Adam(1e-4)) + .activation(Activation.TANH).weightInit(WeightInit.RELU) .l1(0.1).l2(0.2).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(5).build()).layer(1, new OutputLayer.Builder().nIn(5).nOut(4) @@ -478,7 +455,7 @@ public void testFineTuneOverride() { net.init(); MultiLayerNetwork net2 = new TransferLearning.Builder(net) - .fineTuneConfiguration(new FineTuneConfiguration.Builder().learningRate(2e-2) //Should be set on layers + .fineTuneConfiguration(new FineTuneConfiguration.Builder().updater(new Adam(2e-2)) .backpropType(BackpropType.TruncatedBPTT) //Should be set on MLC .build()) .build(); @@ -486,16 +463,14 @@ public void testFineTuneOverride() { //Check original net isn't modified: BaseLayer l0 = (BaseLayer) net.getLayer(0).conf().getLayer(); - assertEquals(Updater.ADAM, l0.getUpdater()); + assertEquals(new Adam(1e-4), l0.getIUpdater()); assertEquals(Activation.TANH.getActivationFunction(), l0.getActivationFn()); - assertEquals(1e-4, l0.getLearningRate(), 1e-8); assertEquals(WeightInit.RELU, l0.getWeightInit()); assertEquals(0.1, l0.getL1(), 1e-6); BaseLayer l1 = (BaseLayer) net.getLayer(1).conf().getLayer(); - assertEquals(Updater.ADAM, l1.getUpdater()); + assertEquals(new Adam(1e-4), l1.getIUpdater()); assertEquals(Activation.HARDSIGMOID.getActivationFunction(), l1.getActivationFn()); - assertEquals(1e-4, l1.getLearningRate(), 1e-8); assertEquals(WeightInit.RELU, l1.getWeightInit()); assertEquals(0.2, l1.getL2(), 1e-6); @@ -503,16 +478,14 @@ public void testFineTuneOverride() { //Check new net has only the appropriate things modified (i.e., LR) l0 = (BaseLayer) net2.getLayer(0).conf().getLayer(); - assertEquals(Updater.ADAM, l0.getUpdater()); + assertEquals(new Adam(2e-2), l0.getIUpdater()); assertEquals(Activation.TANH.getActivationFunction(), l0.getActivationFn()); - assertEquals(2e-2, l0.getLearningRate(), 1e-8); assertEquals(WeightInit.RELU, l0.getWeightInit()); assertEquals(0.1, l0.getL1(), 1e-6); l1 = (BaseLayer) net2.getLayer(1).conf().getLayer(); - assertEquals(Updater.ADAM, l1.getUpdater()); + assertEquals(new Adam(2e-2), l1.getIUpdater()); assertEquals(Activation.HARDSIGMOID.getActivationFunction(), l1.getActivationFn()); - assertEquals(2e-2, l1.getLearningRate(), 1e-8); assertEquals(WeightInit.RELU, l1.getWeightInit()); assertEquals(0.2, l1.getL2(), 1e-6); @@ -525,12 +498,9 @@ public void testAllWithCNNNew() { DataSet randomData = new DataSet(Nd4j.rand(10, 28 * 28 * 3).reshape(10, 3, 28, 28), Nd4j.rand(10, 10)); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( - new NeuralNetConfiguration.Builder().seed(123).iterations(1).learningRate(.01) + new NeuralNetConfiguration.Builder().seed(123).iterations(1) .weightInit(WeightInit.XAVIER) - .optimizationAlgo( - OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.NESTEROVS).momentum( - 0.9) + .updater(new Nesterovs(0.01, 0.9)) .list() .layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).stride(1, 1) .nOut(20).activation(Activation.IDENTITY) @@ -560,11 +530,8 @@ public void testAllWithCNNNew() { modelToFineTune.init(); INDArray asFrozenFeatures = modelToFineTune.feedForwardToLayer(2, randomData.getFeatures(), false).get(2); //10x20x12x12 - NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().learningRate(0.2) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD); - FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().learningRate(0.2) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD) - .build(); + NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.2)); + FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.2)).build(); MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).fineTuneConfiguration(overallConf) .setFeatureExtractor(1).removeLayersFromOutput(5) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestDecayPolicies.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestDecayPolicies.java deleted file mode 100644 index 5d35059eca57..000000000000 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestDecayPolicies.java +++ /dev/null @@ -1,844 +0,0 @@ -package org.deeplearning4j.nn.updater; - -import org.apache.commons.math3.util.FastMath; -import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; -import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.api.Updater; -import org.deeplearning4j.nn.conf.LearningRatePolicy; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.distribution.NormalDistribution; -import org.deeplearning4j.nn.conf.layers.BaseLayer; -import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.OutputLayer; -import org.deeplearning4j.nn.gradient.DefaultGradient; -import org.deeplearning4j.nn.gradient.Gradient; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.nn.params.DefaultParamInitializer; -import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.optimize.api.ConvexOptimizer; -import org.deeplearning4j.optimize.solvers.StochasticGradientDescent; -import org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction; -import org.junit.Before; -import org.junit.Test; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.DataSet; -import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.linalg.learning.config.AdaGrad; -import org.nd4j.linalg.learning.config.Adam; -import org.nd4j.linalg.learning.config.Nesterovs; -import org.nd4j.linalg.learning.config.RmsProp; -import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.linalg.ops.transforms.Transforms; - -import java.util.HashMap; -import java.util.Map; - -import static org.junit.Assert.assertEquals; - -/** - * Test learning rate and momentum decay policies - */ - - -public class TestDecayPolicies { - - int nIn = 3; - int nOut = 2; - double epsilon = 1e-8; - INDArray gradient; - INDArray weightGradient; // = Nd4j.ones(nIn, nOut); - INDArray biasGradient; // = Nd4j.ones(1, nOut); - DefaultGradient gradientSingle = new DefaultGradient(); - DefaultGradient gradientMLN = new DefaultGradient(); - INDArray val, gradExpected, vPrev; - String key; - Map tmpStorage, tmpStorage2, tmpStorage3, tmpStorage4 = new HashMap<>(); - org.deeplearning4j.nn.conf.Updater[] updaters = {org.deeplearning4j.nn.conf.Updater.SGD, - org.deeplearning4j.nn.conf.Updater.ADAGRAD, org.deeplearning4j.nn.conf.Updater.ADAM, - org.deeplearning4j.nn.conf.Updater.RMSPROP, org.deeplearning4j.nn.conf.Updater.ADAMAX}; - - @Before - public void beforeDo() { - Nd4j.getRandom().setSeed(12345); - int nLayers = 2; - String wKey, bKey; - - gradient = Nd4j.ones(1, nIn * nOut + nOut); - gradient.addi(Nd4j.rand(gradient.shape())); - weightGradient = gradient.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nIn * nOut)); - biasGradient = gradient.get(NDArrayIndex.point(0), NDArrayIndex.interval(nIn * nOut, nIn * nOut + nOut)); - - gradientSingle.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); - gradientSingle.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); - gradientSingle.setFlattenedGradient(gradient); - - for (int j = 0; j < nLayers; j++) { - wKey = String.valueOf(j) + "_" + DefaultParamInitializer.WEIGHT_KEY; - gradientMLN.setGradientFor(wKey, weightGradient.dup()); - bKey = String.valueOf(j) + "_" + DefaultParamInitializer.BIAS_KEY; - gradientMLN.setGradientFor(bKey, biasGradient.dup()); - } - - val = null; - gradExpected = null; - vPrev = null; - tmpStorage = new HashMap<>(); - tmpStorage2 = new HashMap<>(); - tmpStorage3 = new HashMap<>(); - tmpStorage4 = new HashMap<>(); - - } - - @Test - public void testLearningRateExponentialDecaySingleLayer() { - int iterations = 2; - - double lr = 1e-2; - double decayRate = 2; - NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.Exponential) - .lrPolicyDecayRate(decayRate).iterations(iterations) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) - .build(); - - int numParams = conf.getLayer().initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true); - layer.setBackpropGradientsViewArray(Nd4j.create(params.shape())); - Updater updater = UpdaterCreator.getUpdater(layer); - - Gradient gradientActual = new DefaultGradient(); - gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); - gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); - for (int i = 0; i < iterations; i++) { - updater.update(layer, gradientActual, i, 1); - double expectedLr = calcExponentialDecay(lr, decayRate, i); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4); - } - } - - - @Test - public void testLearningRateInverseDecaySingleLayer() { - int iterations = 2; - - double lr = 1e-2; - double decayRate = 2; - double power = 3; - NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.Inverse) - .lrPolicyDecayRate(decayRate).lrPolicyPower(power).iterations(iterations) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) - .build(); - - int numParams = conf.getLayer().initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true); - layer.setBackpropGradientsViewArray(Nd4j.create(params.shape())); - Updater updater = UpdaterCreator.getUpdater(layer); - - Gradient gradientActual = new DefaultGradient(); - gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); - gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); - - for (int i = 0; i < iterations; i++) { - updater.update(layer, gradientActual, i, 1); - double expectedLr = calcInverseDecay(lr, decayRate, i, power); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4); - } - } - - @Test - public void testLearningRateStepDecaySingleLayer() { - int iterations = 2; - - double lr = 1e-2; - double decayRate = 2; - double steps = 3; - NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(decayRate) - .lrPolicySteps(steps).iterations(iterations) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) - .build(); - - int numParams = conf.getLayer().initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true); - layer.setBackpropGradientsViewArray(Nd4j.create(params.shape())); - Updater updater = UpdaterCreator.getUpdater(layer); - - Gradient gradientActual = new DefaultGradient(); - gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); - gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); - - for (int i = 0; i < iterations; i++) { - updater.update(layer, gradientActual, i, 1); - double expectedLr = calcStepDecay(lr, decayRate, i, steps); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4); - } - } - - - @Test - public void testLearningRateTorchStepDecaySingleLayer() { - int iterations = 20; - - double lr = 1; - double decayRate = .5; - double steps = 10; - NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.TorchStep) - .lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) - .build(); - - int numParams = conf.getLayer().initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true); - layer.setBackpropGradientsViewArray(Nd4j.create(params.shape())); - Updater updater = UpdaterCreator.getUpdater(layer); - - Gradient gradientActual = new DefaultGradient(); - gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); - gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); - - double expectedLr = lr; - for (int i = 0; i < iterations; i++) { - updater.update(layer, gradientActual, i, 1); - if (i > 1 && steps % i == 0) - expectedLr = calcTorchStepDecay(expectedLr, decayRate); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4); - } - } - - @Test - public void testLearningRatePolyDecaySingleLayer() { - int iterations = 2; - double lr = 1e-2; - double power = 3; - NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.Poly).lrPolicyPower(power) - .iterations(iterations) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) - .build(); - - int numParams = conf.getLayer().initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true); - layer.setBackpropGradientsViewArray(Nd4j.create(params.shape())); - Updater updater = UpdaterCreator.getUpdater(layer); - - Gradient gradientActual = new DefaultGradient(); - gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); - gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); - - for (int i = 0; i < iterations; i++) { - updater.update(layer, gradientActual, i, 1); - double expectedLr = calcPolyDecay(lr, i, power, iterations); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4); - } - } - - - @Test - public void testLearningRateSigmoidDecaySingleLayer() { - int iterations = 2; - double lr = 1e-2; - double decayRate = 2; - double steps = 3; - - NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.Sigmoid) - .lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) - .build(); - - int numParams = conf.getLayer().initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true); - layer.setBackpropGradientsViewArray(Nd4j.create(params.shape())); - Updater updater = UpdaterCreator.getUpdater(layer); - - Gradient gradientActual = new DefaultGradient(); - gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); - gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); - - for (int i = 0; i < iterations; i++) { - updater.update(layer, gradientActual, i, 1); - double expectedLr = calcSigmoidDecay(layer.conf().getLearningRateByParam("W"), decayRate, i, steps); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4); - assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4); - } - } - - - @Test - public void testLearningRateScheduleSingleLayer() { - Map learningRateAfter = new HashMap<>(); - learningRateAfter.put(1, 0.2); - int iterations = 2; - - for (org.deeplearning4j.nn.conf.Updater updaterFunc : updaters) { - beforeDo(); - - gradient.assign(1); - - double lr = 1e-2; - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr) - .learningRateSchedule(learningRateAfter) - .learningRateDecayPolicy(LearningRatePolicy.Schedule).iterations(iterations) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(updaterFunc).build()).build(); - - int numParams = conf.getLayer().initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true); - layer.setBackpropGradientsViewArray(gradient); - Updater updater = UpdaterCreator.getUpdater(layer); - int stateSize = (int) ((BaseLayer) layer.conf().getLayer()).getIUpdater().stateSize(numParams); - if (stateSize > 0) - updater.setStateViewArray(layer, Nd4j.create(1, stateSize), true); - - Gradient gradientActual = new DefaultGradient(gradient); - gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); - gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); - - Gradient gradientExpected = new DefaultGradient(); - gradientExpected.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); - gradientExpected.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); - - for (int i = 0; i < 2; i++) { - updater.update(layer, gradientActual, i, 1); - - if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.SGD)) - lr = testSGDComputation(gradientActual, gradientExpected, lr, learningRateAfter, i); - else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAGRAD)) - lr = testAdaGradComputation(gradientActual, gradientExpected, lr, learningRateAfter, i); - else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAM)) - lr = testAdamComputation(gradientActual, gradientExpected, lr, learningRateAfter, i); - else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.RMSPROP)) - lr = testRMSPropComputation(gradientActual, gradientExpected, lr, learningRateAfter, i); - else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAMAX)) - lr = testAdaMaxComputation(gradientActual, gradientExpected, lr, learningRateAfter, i); - assertEquals(lr, layer.conf().getLearningRateByParam("W"), 1e-4); - } - } - } - - - @Test - public void testLearningRateScheduleMLN() { - Map learningRateAfter = new HashMap<>(); - learningRateAfter.put(1, 0.2); - int iterations = 2; - int[] nIns = {4, 2}; - int[] nOuts = {2, 3}; - - for (org.deeplearning4j.nn.conf.Updater updaterFunc : updaters) { - beforeDo(); - - double lr = 1e-2; - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.Schedule) - .learningRateSchedule(learningRateAfter).iterations(iterations).updater(updaterFunc).list() - .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).build()) - .layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]).build()).backprop(true) - .pretrain(false).build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - Updater updater = UpdaterCreator.getUpdater(net); - - INDArray gradViewArr = net.getFlattenedGradients(); - - String wKey, bKey; - - for (int i = 0; i < 2; i++) { - Gradient gradientActual = new DefaultGradient(); - Gradient gradientExpected = new DefaultGradient(); - int paramsSoFar = 0; - for (int k = 0; k < net.getnLayers(); k++) { - int nParams = net.getLayer(k).numParams(); - INDArray g = gradViewArr.get(NDArrayIndex.point(0), - NDArrayIndex.interval(paramsSoFar, paramsSoFar + nParams)); - int nW = nIns[k] * nOuts[k]; - int nB = nOuts[k]; - INDArray gw = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nW)); - INDArray gb = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(nW, nW + nB)); - wKey = String.valueOf(k) + "_" + DefaultParamInitializer.WEIGHT_KEY; - gradientActual.setGradientFor(wKey, gw); - gradientExpected.setGradientFor(wKey, gw.dup()); - bKey = String.valueOf(k) + "_" + DefaultParamInitializer.BIAS_KEY; - gradientActual.setGradientFor(bKey, gb); - gradientExpected.setGradientFor(bKey, gb.dup()); - - paramsSoFar += nParams; - } - - updater.update(net, gradientActual, i, 1); - if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.SGD)) - lr = testSGDComputation(gradientActual, gradientExpected, lr, learningRateAfter, i); - else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAGRAD)) - lr = testAdaGradComputation(gradientActual, gradientExpected, lr, learningRateAfter, i); - else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAM)) - lr = testAdamComputation(gradientActual, gradientExpected, lr, learningRateAfter, i); - else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.RMSPROP)) - lr = testRMSPropComputation(gradientActual, gradientExpected, lr, learningRateAfter, i); - - if (i == 0) - assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), lr); - else - assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), learningRateAfter.get(1)); - } - } - } - - @Test - public void testLearningRateScoreDecay() { - double lr = 0.01; - double lrScoreDecay = 0.10; - int[] nIns = {4, 2}; - int[] nOuts = {2, 3}; - int oldScore = 1; - int newScore = 1; - int iteration = 3; - INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]); - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr) - .learningRateDecayPolicy(LearningRatePolicy.Score).lrPolicyDecayRate(lrScoreDecay).list() - .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) - .layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) - .backprop(true).pretrain(false).build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - ConvexOptimizer opt = new StochasticGradientDescent(net.getDefaultConfiguration(), - new NegativeDefaultStepFunction(), null, net); - opt.checkTerminalConditions(gradientW, oldScore, newScore, iteration); - assertEquals(lrScoreDecay, net.getLayer(0).conf().getLrPolicyDecayRate(), 1e-4); - assertEquals(lr * (lrScoreDecay + Nd4j.EPS_THRESHOLD), net.getLayer(0).conf().getLearningRateByParam("W"), - 1e-4); - - } - - @Test - public void testOriginalLearningRateUnchanged() { - // Confirm learning rate is unchanged while hash is updated - - DataSet ds = new IrisDataSetIterator(150, 150).next(); - ds.normalizeZeroMeanZeroUnitVariance(); - - Nd4j.getRandom().setSeed(12345); - - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().regularization(false) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).learningRate(1.0) - .learningRateDecayPolicy(LearningRatePolicy.Score).lrPolicyDecayRate(0.10) - .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).seed(12345L).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.SIGMOID) - .build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE) - .activation(Activation.TANH).nIn(3).nOut(3).build()) - .pretrain(false).backprop(true).build(); - MultiLayerNetwork mln = new MultiLayerNetwork(conf); - mln.init(); - - //Run a number of iterations of learning - mln.setInput(ds.getFeatureMatrix()); - mln.setLabels(ds.getLabels()); - mln.computeGradientAndScore(); - for (int j = 0; j < 1; j++) - mln.fit(ds); - mln.computeGradientAndScore(); - - double lr0 = ((BaseLayer) mln.getLayer(0).conf().getLayer()).getLearningRate(); - double lr1 = ((BaseLayer) mln.getLayer(1).conf().getLayer()).getLearningRate(); - assertEquals(1.0, lr0, 0.0); - assertEquals(1.0, lr1, 0.0); - } - - @Test - public void testMomentumScheduleSingleLayer() { - double lr = 1e-2; - double mu = 0.9; - Map momentumAfter = new HashMap<>(); - momentumAfter.put(1, 0.2); - int iterations = 2; - - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu) - .momentumAfter(momentumAfter).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn) - .nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()) - .build(); - - int numParams = conf.getLayer().initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true); - layer.setBackpropGradientsViewArray(gradient); - Updater updater = UpdaterCreator.getUpdater(layer); - - Gradient gradientExpected = new DefaultGradient(); - gradientExpected.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); - gradientExpected.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); - - for (int i = 0; i < 2; i++) { - updater.update(layer, gradientSingle, i, 1); - mu = testNesterovsComputation(gradientSingle, gradientExpected, lr, mu, momentumAfter, i); - assertEquals(mu, ((BaseLayer) layer.conf().getLayer()).getMomentum(), 1e-4); - } - } - - @Test - public void testMomentumScheduleMLN() { - double lr = 1e-2; - double mu = 0.6; - Map momentumAfter = new HashMap<>(); - momentumAfter.put(1, 0.2); - int iterations = 2; - int[] nIns = {4, 2}; - int[] nOuts = {2, 3}; - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu) - .momentumAfter(momentumAfter).iterations(iterations).list() - .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]) - .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()) - .layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]) - .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()) - .backprop(true).pretrain(false).build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - Updater updater = UpdaterCreator.getUpdater(net); - int stateSize = (int) new Nesterovs().stateSize(net.numParams()); - updater.setStateViewArray(net, Nd4j.create(1, stateSize), true); - - String wKey, bKey; - - Gradient gradientMLN = new DefaultGradient(); - INDArray gradViewArr = net.getGradientsViewArray(); - int paramsSoFar = 0; - for (int j = 0; j < 2; j++) { - int nParams = net.getLayer(j).numParams(); - INDArray g = gradViewArr.get(NDArrayIndex.point(0), - NDArrayIndex.interval(paramsSoFar, paramsSoFar + nParams)); - int nW = nIns[j] * nOuts[j]; - int nB = nOuts[j]; - INDArray gw = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nW)); - INDArray gb = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(nW, nW + nB)); - wKey = String.valueOf(j) + "_" + DefaultParamInitializer.WEIGHT_KEY; - gradientMLN.setGradientFor(wKey, gw); - bKey = String.valueOf(j) + "_" + DefaultParamInitializer.BIAS_KEY; - gradientMLN.setGradientFor(bKey, gb); - paramsSoFar += nParams; - } - - Gradient gradientExpected = new DefaultGradient(); - gradViewArr = gradViewArr.dup(); - paramsSoFar = 0; - for (int j = 0; j < net.getnLayers(); j++) { - int nParams = net.getLayer(j).numParams(); - INDArray g = gradViewArr.get(NDArrayIndex.point(0), - NDArrayIndex.interval(paramsSoFar, paramsSoFar + nParams)); - int nW = nIns[j] * nOuts[j]; - int nB = nOuts[j]; - INDArray gw = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nW)); - INDArray gb = g.get(NDArrayIndex.point(0), NDArrayIndex.interval(nW, nW + nB)); - wKey = String.valueOf(j) + "_" + DefaultParamInitializer.WEIGHT_KEY; - gradientExpected.setGradientFor(wKey, gw); - bKey = String.valueOf(j) + "_" + DefaultParamInitializer.BIAS_KEY; - gradientExpected.setGradientFor(bKey, gb); - } - - - - for (int i = 0; i < 2; i++) { - updater.update(net, gradientMLN, i, 1); - mu = testNesterovsComputation(gradientMLN, gradientExpected, lr, mu, momentumAfter, i); - assertEquals(mu, ((BaseLayer) net.getLayer(1).conf().getLayer()).getMomentum(), 1e-4); - } - } - - - @Test - public void testUpdatingInConf() throws Exception { - - OptimizationAlgorithm[] optimizationAlgorithms = new OptimizationAlgorithm[] { - OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT, OptimizationAlgorithm.LINE_GRADIENT_DESCENT, - OptimizationAlgorithm.CONJUGATE_GRADIENT, OptimizationAlgorithm.LBFGS}; - - for (OptimizationAlgorithm oa : optimizationAlgorithms) { - Map momentumSchedule = new HashMap<>(); - double m = 0.001; - for (int i = 0; i <= 100; i++) { - momentumSchedule.put(i, Math.min(m, 0.9999)); - m += 0.001; - } - - Map learningRateSchedule = new HashMap<>(); - double lr = 0.1; - for (int i = 0; i <= 100; i++) { - learningRateSchedule.put(i, lr); - lr *= 0.96; - } - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(oa).iterations(1) - .learningRateDecayPolicy(LearningRatePolicy.Schedule) - .learningRateSchedule(learningRateSchedule) - .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).weightInit(WeightInit.XAVIER) - .momentum(0.9).momentumAfter(momentumSchedule).regularization(true).l2(0.0001).list() - .layer(0, new DenseLayer.Builder().nIn(784).nOut(10).build()) - .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).build()).pretrain(false).backprop(true) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - int last_layer_index = 1; - - DataSetIterator trainIter = new MnistDataSetIterator(64, true, 12345); - - - int count = 0; - while (trainIter.hasNext()) { - net.fit(trainIter.next()); - - // always print the same number (0.1 and 0.9) - double lrLastLayer = (net.getLayer(last_layer_index)).conf().getLearningRateByParam("W"); - double mLastLayer = ((BaseLayer) (net.getLayer(last_layer_index)).conf().getLayer()).getMomentum(); - - assertEquals(learningRateSchedule.get(count), lrLastLayer, 1e-6); - assertEquals(momentumSchedule.get(count), mLastLayer, 1e-6); - - if (count++ >= 100) - break; - } - } - } - - ///// Updater Calculations - - public double testSGDComputation(Gradient gradientActual, Gradient gradientExpected, double lr, - Map learningRateAfter, int i) { - for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) { - if (learningRateAfter != null) - lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr; - key = entry.getKey(); - val = entry.getValue(); - gradExpected = val.mul(lr); - gradientExpected.setGradientFor(key, gradExpected); - INDArray act = gradientActual.getGradientFor(key); - assertEquals(gradExpected, act); - } - return lr; - } - - public double testNesterovsComputation(Gradient gradientActual, Gradient gradientExpected, double lr, double mu, - Map momentumAfter, int i) { - - for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) { - if (momentumAfter != null) - mu = (momentumAfter.containsKey(i)) ? momentumAfter.get(i) : mu; - key = entry.getKey(); - val = entry.getValue(); - INDArray vTmp = tmpStorage.get(key); - - if (vTmp == null) - vTmp = Nd4j.zeros(val.shape()); - vPrev = vTmp; - vTmp = vPrev.mul(mu).subi(val.mul(lr)); - gradExpected = vPrev.muli(mu).addi(vTmp.mul(-mu - 1)); - gradientExpected.setGradientFor(key, gradExpected); - - INDArray act = gradientActual.getGradientFor(entry.getKey()); - assertEquals(gradExpected, act); - tmpStorage.put(key, vTmp); - } - return mu; - } - - - public double testAdaGradComputation(Gradient gradientActual, Gradient gradientExpected, double lr, - Map learningRateAfter, int i) { - - double epsilon = AdaGrad.DEFAULT_ADAGRAD_EPSILON; - - for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) { - if (learningRateAfter != null) - lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr; - key = entry.getKey(); - val = entry.getValue(); - INDArray historicalGradient = tmpStorage.get(key); - - if (historicalGradient == null) - historicalGradient = val.mul(val); - else - historicalGradient.addi(val.mul(val)); - - gradExpected = Transforms.sqrt(historicalGradient.add(epsilon)).rdiv(lr).mul(val); - assertEquals(gradExpected, gradientActual.getGradientFor(key)); - gradientExpected.setGradientFor(key, gradExpected); - tmpStorage.put(key, historicalGradient); - } - - return lr; - } - - public double testAdamComputation(Gradient gradientActual, Gradient gradientExpected, double lr, - Map learningRateAfter, int i) { - double beta1 = 0.9; - double beta2 = 0.999; - double epsilon = Adam.DEFAULT_ADAM_EPSILON; - - for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) { - if (learningRateAfter != null) - lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr; - key = entry.getKey(); - val = entry.getValue(); - - INDArray mTmp = tmpStorage2.get(key); - INDArray vTmp = tmpStorage3.get(key); - - if (mTmp == null) - mTmp = Nd4j.zeros(val.shape()); - if (vTmp == null) - vTmp = Nd4j.zeros(val.shape()); - - mTmp.muli(beta1).addi(val.mul(1.0 - beta1)); - vTmp.muli(beta2).addi(val.mul(val).mul(1.0 - beta2)); - - double beta1t = FastMath.pow(beta1, i + 1); - double beta2t = FastMath.pow(beta2, i + 1); - double alphat = lr * FastMath.sqrt(1 - beta2t) / (1 - beta1t); - if (Double.isNaN(alphat) || alphat == 0.0) - alphat = epsilon; - - gradExpected = mTmp.mul(alphat).divi(Transforms.sqrt(vTmp).addi(epsilon)); - gradientExpected.setGradientFor(key, gradExpected); - assertEquals(gradExpected, gradientActual.getGradientFor(key)); - - tmpStorage2.put(key, mTmp); - tmpStorage3.put(key, vTmp); - } - return lr; - } - - public double testAdaMaxComputation(Gradient gradientActual, Gradient gradientExpected, double lr, - Map learningRateAfter, int i) { - - double beta1 = 0.9; - double beta2 = 0.999; - - for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) { - if (learningRateAfter != null) - lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr; - key = entry.getKey(); - val = entry.getValue(); - - INDArray mTmp = tmpStorage2.get(key); - INDArray uTmp = tmpStorage3.get(key); - - if (mTmp == null) - mTmp = Nd4j.zeros(val.shape()); - if (uTmp == null) - uTmp = Nd4j.zeros(val.shape()); - - mTmp.muli(beta1).addi(val.mul(1.0 - beta1)); - uTmp.assign(Transforms.max(uTmp.mul(beta2), Transforms.abs(val))); - - double beta1t = FastMath.pow(beta1, i + 1); - double alphat = lr / (1 - beta1t); - if (Double.isNaN(alphat) || alphat == 0.0) - alphat = epsilon; - - gradExpected = mTmp.mul(alphat).divi(uTmp); - gradientExpected.setGradientFor(key, gradExpected); - assertEquals(gradExpected, gradientActual.getGradientFor(key)); - - tmpStorage2.put(key, mTmp); - tmpStorage3.put(key, uTmp); - } - return lr; - } - - public double testRMSPropComputation(Gradient gradientActual, Gradient gradientExpected, double lr, - Map learningRateAfter, int i) { - double rmsDecay = RmsProp.DEFAULT_RMSPROP_RMSDECAY; - double epsilon = RmsProp.DEFAULT_RMSPROP_EPSILON; - - for (Map.Entry entry : gradientExpected.gradientForVariable().entrySet()) { - if (learningRateAfter != null) - lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr; - key = entry.getKey(); - val = entry.getValue(); - INDArray lastGTmp = tmpStorage4.get(key); - - if (lastGTmp == null) - lastGTmp = Nd4j.valueArrayOf(val.shape(), epsilon); - - lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay)); - gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(epsilon))); - gradientExpected.setGradientFor(key, gradExpected); - - assertEquals(gradExpected, gradientActual.getGradientFor(key)); - tmpStorage4.put(key, lastGTmp); - } - - return lr; - } - - ///// Learning Rate Decay Policy Calculations - - public double calcExponentialDecay(double lr, double decayRate, double iteration) { - return lr * Math.pow(decayRate, iteration); - } - - public double calcInverseDecay(double lr, double decayRate, double iteration, double power) { - return lr / Math.pow((1 + decayRate * iteration), power); - } - - public double calcStepDecay(double lr, double decayRate, double iteration, double steps) { - return lr * Math.pow(decayRate, Math.floor(iteration / steps)); - } - - public double calcTorchStepDecay(double lr, double decayRate) { - return lr * decayRate; - } - - public double calcPolyDecay(double lr, double iteration, double power, double maxIterations) { - return lr * Math.pow((1 - iteration / maxIterations), power); - } - - public double calcSigmoidDecay(double lr, double decayRate, double iteration, double steps) { - return lr / (1 + Math.exp(-decayRate * (iteration - steps))); - } - -} - diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java index 27d3c2e0b99b..337d8726ec5e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java @@ -13,6 +13,7 @@ import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.NoOp; import static org.junit.Assert.*; @@ -24,7 +25,7 @@ public void testRenormalizatonPerLayer() { NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .layer(new DenseLayer.Builder().nIn(10).nOut(20) - .updater(org.deeplearning4j.nn.conf.Updater.NONE) + .updater(new NoOp()) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).build()) .build(); @@ -43,7 +44,7 @@ public void testRenormalizatonPerLayer() { gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad); Updater updater = UpdaterCreator.getUpdater(layer); - updater.update(layer, gradient, 0, 1); + updater.update(layer, gradient, 0, 0, 1); assertNotEquals(weightGradCopy, weightGrad); assertNotEquals(biasGradCopy, biasGrad); @@ -70,7 +71,7 @@ public void testRenormalizationPerParamType() { NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .layer(new DenseLayer.Builder().nIn(10).nOut(20) - .updater(org.deeplearning4j.nn.conf.Updater.NONE) + .updater(new NoOp()) .gradientNormalization(GradientNormalization.RenormalizeL2PerParamType).build()) .build(); @@ -87,7 +88,7 @@ public void testRenormalizationPerParamType() { gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad); gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad); - updater.update(layer, gradient, 0, 1); + updater.update(layer, gradient, 0, 0, 1); INDArray normWeightsExpected = weightGradCopy.div(weightGradCopy.norm2Number()); INDArray normBiasExpected = biasGradCopy.div(biasGradCopy.norm2Number()); @@ -102,7 +103,7 @@ public void testAbsValueClippingPerElement() { double threshold = 3; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer( - new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE) + new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp()) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(threshold).build()) .build(); @@ -122,7 +123,7 @@ public void testAbsValueClippingPerElement() { gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad); Updater updater = UpdaterCreator.getUpdater(layer); - updater.update(layer, gradient, 0, 1); + updater.update(layer, gradient, 0, 0, 1); assertNotEquals(weightGradCopy, weightGrad); assertNotEquals(biasGradCopy, biasGrad); @@ -158,7 +159,7 @@ public void testL2ClippingPerLayer() { //t=1: large -> clipping NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer( - new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE) + new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp()) .gradientNormalization(GradientNormalization.ClipL2PerLayer) .gradientNormalizationThreshold(threshold).build()) .build(); @@ -185,7 +186,7 @@ public void testL2ClippingPerLayer() { assertTrue(layerGradL2 > threshold); Updater updater = UpdaterCreator.getUpdater(layer); - updater.update(layer, gradient, 0, 1); + updater.update(layer, gradient, 0, 0, 1); if (t == 0) { //norm2 < threshold -> no change @@ -213,7 +214,7 @@ public void testL2ClippingPerParamType() { double threshold = 3; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer( - new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE) + new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp()) .gradientNormalization(GradientNormalization.ClipL2PerParamType) .gradientNormalizationThreshold(threshold).build()) .build(); @@ -236,7 +237,7 @@ public void testL2ClippingPerParamType() { assertTrue(weightL2 < threshold); assertTrue(biasL2 > threshold); - updater.update(layer, gradient, 0, 1); + updater.update(layer, gradient, 0, 0, 1); assertEquals(weightGradCopy, weightGrad); //weight norm2 < threshold -> no change assertNotEquals(biasGradCopy, biasGrad); //bias norm2 > threshold -> rescale diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java index 67dffe949b42..8495b491ba5d 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java @@ -65,10 +65,10 @@ public void testAdaDeltaUpdate() { double rho = 0.85; - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().rho(rho) + NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA) - .epsilon(Nd4j.EPS_THRESHOLD).build()) + .updater(new AdaDelta(rho, Nd4j.EPS_THRESHOLD)) + .build()) .build(); int numParams = conf.getLayer().initializer().numParams(conf); @@ -89,7 +89,7 @@ public void testAdaDeltaUpdate() { int count = 0; for (int i = 0; i < 2; i++) { - updater.update(layer, gradient, i, 1); + updater.update(layer, gradient, i, 0, 1); // calculations for one iteration / update @@ -121,7 +121,7 @@ public void testAdaDeltaUpdate() { msdx.put(key, msdxTmp); count++; } - assertEquals(rho, layer.layerConf().getRho(), 1e-4); + assertEquals(rho, ((AdaDelta)layer.layerConf().getIUpdater()).getRho(), 1e-4); } assertEquals(4, count); @@ -133,9 +133,8 @@ public void testAdaGradUpdater() { double epsilon = AdaGrad.DEFAULT_ADAGRAD_EPSILON; NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()) + new NeuralNetConfiguration.Builder().updater(new AdaGrad(lr)) + .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); int numParams = conf.getLayer().initializer().numParams(conf); @@ -154,7 +153,7 @@ public void testAdaGradUpdater() { gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wg); gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.BIAS_KEY, bg); - updater.update(layer, gradient, -1, 1); + updater.update(layer, gradient, -1, 0, 1); int count = 0; for (Map.Entry entry : gradientCopyPreUpdate.gradientForVariable().entrySet()) { @@ -163,7 +162,7 @@ public void testAdaGradUpdater() { assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); count++; } - assertEquals(lr, layer.layerConf().getLearningRate(), 1e-4); + assertEquals(lr, ((AdaGrad)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4); assertEquals(2, count); } @@ -177,9 +176,8 @@ public void testAdamUpdater() { double beta2 = 0.888; double epsilon = Adam.DEFAULT_ADAM_EPSILON; - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr) - .iterations(iteration).adamMeanDecay(beta1).adamVarDecay(beta2).layer(new DenseLayer.Builder() - .nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADAM).build()) + NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Adam(lr, beta1, beta2, Adam.DEFAULT_ADAM_EPSILON)) + .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); int numParams = conf.getLayer().initializer().numParams(conf); @@ -191,7 +189,7 @@ public void testAdamUpdater() { INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); - updater.update(layer, gradient, iteration, 1); + updater.update(layer, gradient, iteration, 0, 1); double beta1t = FastMath.pow(beta1, iteration + 1); double beta2t = FastMath.pow(beta2, iteration + 1); @@ -223,8 +221,8 @@ public void testAdamUpdater() { count++; } - assertEquals(beta1, layer.layerConf().getAdamMeanDecay(), 1e-4); - assertEquals(beta2, layer.layerConf().getAdamVarDecay(), 1e-4); + assertEquals(beta1, ((Adam)layer.layerConf().getIUpdater()).getBeta1(), 1e-4); + assertEquals(beta2, ((Adam)layer.layerConf().getIUpdater()).getBeta2(), 1e-4); assertEquals(2, count); } @@ -238,12 +236,12 @@ public void testNadamUpdater() { double epsilon = Nadam.DEFAULT_NADAM_EPSILON; NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr).iterations(iteration) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(new Nadam.Builder().learningRate(lr).beta1(beta1) - .beta2(beta2).epsilon(epsilon).build()) - .build()) - .build(); + new NeuralNetConfiguration.Builder().iterations(iteration) + .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) + .updater(new Nadam.Builder().learningRate(lr).beta1(beta1) + .beta2(beta2).epsilon(epsilon).build()) + .build()) + .build(); int numParams = conf.getLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); @@ -258,7 +256,7 @@ public void testNadamUpdater() { /* * Making update for layer * */ - updater.update(layer, gradient, iteration, 1); + updater.update(layer, gradient, iteration, 0,1); double beta1t = FastMath.pow(beta1, iteration + 1); @@ -330,9 +328,9 @@ public void testAdaMaxUpdater() { double beta2 = 0.888; double epsilon = AdaMax.DEFAULT_ADAMAX_EPSILON; - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr) - .iterations(iteration).adamMeanDecay(beta1).adamVarDecay(beta2).layer(new DenseLayer.Builder() - .nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADAMAX).build()) + NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + .updater(new AdaMax(lr, beta1, beta2, AdaMax.DEFAULT_ADAMAX_EPSILON)) + .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); int numParams = conf.getLayer().initializer().numParams(conf); @@ -344,7 +342,7 @@ public void testAdaMaxUpdater() { INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); - updater.update(layer, gradient, iteration, 1); + updater.update(layer, gradient, iteration, 0, 1); double beta1t = FastMath.pow(beta1, iteration + 1); double beta2t = FastMath.pow(beta2, iteration + 1); @@ -376,8 +374,8 @@ public void testAdaMaxUpdater() { count++; } - assertEquals(beta1, layer.layerConf().getAdamMeanDecay(), 1e-4); - assertEquals(beta2, layer.layerConf().getAdamVarDecay(), 1e-4); + assertEquals(beta1, ((AdaMax)layer.layerConf().getIUpdater()).getBeta1(), 1e-4); + assertEquals(beta2, ((AdaMax)layer.layerConf().getIUpdater()).getBeta2(), 1e-4); assertEquals(2, count); } @@ -387,9 +385,8 @@ public void testNestorovsUpdater() { double mu = 0.6; NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()) + new NeuralNetConfiguration.Builder().updater(new Nesterovs(lr, mu)) + .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); int numParams = conf.getLayer().initializer().numParams(conf); @@ -408,7 +405,7 @@ public void testNestorovsUpdater() { gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wg); gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.BIAS_KEY, bg); - updater.update(layer, gradient, -1, 1); + updater.update(layer, gradient, -1, 0, 1); int count = 0; for (Map.Entry entry : gradientCopyPreUpdate.gradientForVariable().entrySet()) { @@ -422,7 +419,7 @@ public void testNestorovsUpdater() { count++; } - assertEquals(mu, layer.layerConf().getMomentum(), 1e-4); + assertEquals(mu, ((Nesterovs)layer.layerConf().getIUpdater()).getMomentum(), 1e-4); assertEquals(2, count); } @@ -435,9 +432,8 @@ public void testRMSPropUpdater() { NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr).rmsDecay(rmsDecay) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build()) + new NeuralNetConfiguration.Builder().updater(new RmsProp(lr,rmsDecay, RmsProp.DEFAULT_RMSPROP_EPSILON)) + .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); int numParams = conf.getLayer().initializer().numParams(conf); @@ -457,7 +453,7 @@ public void testRMSPropUpdater() { gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wg); gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.BIAS_KEY, bg); - updater.update(layer, gradient, -1, 1); + updater.update(layer, gradient, -1, 0, 1); double epsilon = 1e-8; @@ -475,7 +471,7 @@ public void testRMSPropUpdater() { assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); lastG.put(key, lastGTmp); } - assertEquals(rmsDecay, layer.layerConf().getRmsDecay(), 1e-4); + assertEquals(rmsDecay, ((RmsProp)layer.layerConf().getIUpdater()).getRmsDecay(), 1e-4); } @Test @@ -483,9 +479,8 @@ public void testSGDUpdater() { double lr = 0.05; NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) + new NeuralNetConfiguration.Builder().updater(new Sgd(lr)) + .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); int numParams = conf.getLayer().initializer().numParams(conf); @@ -501,14 +496,14 @@ public void testSGDUpdater() { gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wg); gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.BIAS_KEY, bg); - updater.update(layer, gradient, -1, 1); + updater.update(layer, gradient, -1, 0, 1); for (Map.Entry entry : gradientCopyPreUpdate.gradientForVariable().entrySet()) { val = entry.getValue(); gradExpected = val.mul(lr); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } - assertEquals(lr, layer.layerConf().getLearningRate(), 1e-4); + assertEquals(lr, ((Sgd)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4); } @@ -518,9 +513,8 @@ public void testNoOpUpdater() { double lr = 0.5; NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(lr) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) - .updater(org.deeplearning4j.nn.conf.Updater.NONE).build()) + new NeuralNetConfiguration.Builder().updater(new NoOp()) + .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); int numParams = conf.getLayer().initializer().numParams(conf); @@ -540,7 +534,7 @@ public void testNoOpUpdater() { gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, wg); gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, bg); - updater.update(layer, gradient, -1, 1); + updater.update(layer, gradient, -1, 0, 1); INDArray weightGradActual = gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY); INDArray biasGradActual = gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY); @@ -555,15 +549,14 @@ public void testMultiLayerUpdater() throws Exception { Nd4j.getRandom().setSeed(12345L); double lr = 0.03; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(0.6).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(5) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + .layer(0, new DenseLayer.Builder().nIn(4).nOut(5).updater(new Sgd(lr)).build()) .layer(1, new DenseLayer.Builder().nIn(5).nOut(6) - .updater(org.deeplearning4j.nn.conf.Updater.NONE).build()) + .updater(new NoOp()).build()) .layer(2, new DenseLayer.Builder().nIn(6).nOut(7) - .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()) + .updater(new AdaGrad(lr)).build()) .layer(3, new OutputLayer.Builder().nIn(7).nOut(8) - .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS) + .updater(new Nesterovs(0.6)) .activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE) .build()) .build(); @@ -636,13 +629,13 @@ public void testMultiLayerUpdater() throws Exception { layerGradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wGrad.dup()); layerGradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, bGrad.dup()); - uArr[j].getConfig().applySchedules(0, net.getLayer(j).conf().getLearningRateByParam("W")); +// uArr[j].getConfig().applySchedules(0, net.getLayer(j).conf().getLearningRateByParam("W")); for (String s : layerGradient.gradientForVariable().keySet()) { expectedGradient.put(j + "_" + s, layerGradient.getGradientFor(s)); } } - updater.update(net, gradient, i, 1); + updater.update(net, gradient, i, 0, 1); assertEquals(gradient.gradientForVariable(), expectedGradient); } } @@ -657,11 +650,11 @@ public void testSetGetUpdater() { int nIn = 4; int nOut = 8; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(0.6).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(lr,0.6)).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5) .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) .layer(1, new DenseLayer.Builder().nIn(5).nOut(6) - .updater(org.deeplearning4j.nn.conf.Updater.NONE).build()) + .updater(new NoOp()).build()) .layer(2, new DenseLayer.Builder().nIn(6).nOut(7) .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()) .layer(3, new OutputLayer.Builder().nIn(7).nOut(nOut) @@ -688,11 +681,11 @@ public void testSetGetUpdater2() { int nIn = 4; int nOut = 8; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(0.6).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(lr,0.6)).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5) .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) .layer(1, new DenseLayer.Builder().nIn(5).nOut(6) - .updater(org.deeplearning4j.nn.conf.Updater.NONE).build()) + .updater(new NoOp()).build()) .layer(2, new DenseLayer.Builder().nIn(6).nOut(7) .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()) .layer(3, new OutputLayer.Builder().nIn(7).nOut(nOut) @@ -707,67 +700,6 @@ public void testSetGetUpdater2() { assertTrue(newUpdater == net.getUpdater()); //Should be identical object } - - @Test - public void testEpsilon() { - //Test epsilon setting - adagrad - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).epsilon(0.123).build()) - .layer(2, new OutputLayer.Builder().nIn(2).nOut(2).epsilon(0.456).build()).build(); - - assertEquals(1e-6, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(0).getLayer()).getEpsilon(), - 0.0); - assertEquals(0.123, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(1).getLayer()).getEpsilon(), - 0.0); - assertEquals(0.456, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(2).getLayer()).getEpsilon(), - 0.0); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - // net.fit(Nd4j.create(1,2), Nd4j.create(1,2)); - MultiLayerUpdater updater = (MultiLayerUpdater) net.getUpdater(); - List l = updater.getUpdaterBlocks(); - - AdaGrad adaGrad = (AdaGrad) l.get(0).getGradientUpdater().getConfig(); - assertEquals(1e-6, adaGrad.getEpsilon(), 0.0); - - AdaGrad adaGrad1 = (AdaGrad) l.get(1).getGradientUpdater().getConfig(); - assertEquals(0.123, adaGrad1.getEpsilon(), 0.0); - - AdaGrad adaGrad2 = (AdaGrad) l.get(2).getGradientUpdater().getConfig(); - assertEquals(0.456, adaGrad2.getEpsilon(), 0.0); - - - //Test epsilon setting - adadelta - conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).epsilon(0.123).build()) - .layer(2, new OutputLayer.Builder().nIn(2).nOut(2).epsilon(0.456).build()).build(); - - assertEquals(1e-6, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(0).getLayer()).getEpsilon(), - 0.0); - assertEquals(0.123, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(1).getLayer()).getEpsilon(), - 0.0); - assertEquals(0.456, ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(2).getLayer()).getEpsilon(), - 0.0); - - net = new MultiLayerNetwork(conf); - net.init(); - updater = (MultiLayerUpdater) net.getUpdater(); - l = updater.getUpdaterBlocks(); - - AdaDelta adaDelta = (AdaDelta) l.get(0).getGradientUpdater().getConfig(); - assertEquals(1e-6, adaDelta.getEpsilon(), 0.0); - - AdaDelta adaDelta1 = (AdaDelta) l.get(1).getGradientUpdater().getConfig(); - assertEquals(0.123, adaDelta1.getEpsilon(), 0.0); - - AdaDelta adaDelta2 = (AdaDelta) l.get(2).getGradientUpdater().getConfig(); - assertEquals(0.456, adaDelta2.getEpsilon(), 0.0); - } - @Test public void testPretrain() { @@ -786,8 +718,7 @@ public void testPretrain() { gradient.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).seed(42) - .updater(org.deeplearning4j.nn.conf.Updater.SGD) + NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(lr)).seed(42) .layer(new org.deeplearning4j.nn.conf.layers.RBM.Builder() .lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY) .activation(Activation.IDENTITY).nIn(nIn).nOut(nOut).build()) @@ -808,14 +739,14 @@ public void testPretrain() { gradientCopyPreUpdate.setGradientFor(DefaultParamInitializer.BIAS_KEY, bg); gradientCopyPreUpdate.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbg); - updater.update(layer, gradient, -1, 1); + updater.update(layer, gradient, -1, 0, 1); for (Map.Entry entry : gradientCopyPreUpdate.gradientForVariable().entrySet()) { val = entry.getValue(); gradExpected = val.mul(lr); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } - assertEquals(lr, layer.layerConf().getLearningRate(), 1e-4); + assertEquals(lr, ((Sgd)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4); //Test with pretrain == false @@ -845,7 +776,7 @@ public void testPretrain() { layer.setBackpropGradientsViewArray(gradients); updater = UpdaterCreator.getUpdater(layer); - updater.update(layer, gradient, -1, 1); + updater.update(layer, gradient, -1, 0, 1); for (Map.Entry entry : gradientCopyPreUpdate.gradientForVariable().entrySet()) { // System.out.println(entry.getKey()); @@ -859,49 +790,7 @@ public void testPretrain() { // System.out.println(gradExpected + "\t" + gradient.getGradientFor(entry.getKey())); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } - assertEquals(lr, layer.layerConf().getLearningRate(), 1e-4); - } - - @Test - public void testEpsilonAllUpdaters() { - - double e = 7e-2; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().epsilon(e).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2) - .updater(org.deeplearning4j.nn.conf.Updater.ADAM).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2) - .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build()) - .layer(2, new DenseLayer.Builder().nIn(2).nOut(2) - .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).build()) - .layer(3, new DenseLayer.Builder().nIn(2).nOut(2) - .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()) - .layer(4, new OutputLayer.Builder().nIn(2).nOut(2) - .updater(org.deeplearning4j.nn.conf.Updater.ADAMAX).build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - net.fit(Nd4j.create(1, 2), Nd4j.create(1, 2)); - - - MultiLayerUpdater updater = (MultiLayerUpdater) net.getUpdater(); - List l = updater.getUpdaterBlocks(); - - Adam adam = (Adam) l.get(0).getGradientUpdater().getConfig(); //u0.updaterForVariable.get("W"); - assertEquals(e, adam.getEpsilon(), 0.0); - - RmsProp rmsProp = (RmsProp) l.get(1).getGradientUpdater().getConfig(); //u1.updaterForVariable.get("W"); - assertEquals(e, rmsProp.getEpsilon(), 0.0); - - AdaDelta adaDelta = (AdaDelta) l.get(2).getGradientUpdater().getConfig(); //u2.updaterForVariable.get("W"); - assertEquals(e, adaDelta.getEpsilon(), 0.0); - - AdaGrad adaGrad = (AdaGrad) l.get(3).getGradientUpdater().getConfig(); //u3.updaterForVariable.get("W"); - assertEquals(e, adaGrad.getEpsilon(), 0.0); - - AdaMax adaMax = (AdaMax) l.get(4).getGradientUpdater().getConfig(); //u3.updaterForVariable.get("W"); - assertEquals(e, adaMax.getEpsilon(), 0.0); + assertEquals(lr, ((Sgd)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4); } @Test @@ -910,18 +799,18 @@ public void testUpdaterBlockMlnAndCG() { List blocks; if (i == 0) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.5).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).name("l0") - .updater(org.deeplearning4j.nn.conf.Updater.ADAM).build()) + .updater(new Adam(0.5)).build()) .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).name("l1") - .updater(org.deeplearning4j.nn.conf.Updater.ADAM).biasLearningRate(0.25) + .updater(new Adam(0.5)).biasUpdater(new Adam(0.25)) .build()) .layer(2, new DenseLayer.Builder().nIn(10).nOut(10).name("l2") - .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).build()) + .updater(new AdaDelta()).build()) .layer(3, new DenseLayer.Builder().nIn(10).nOut(10).name("l3") - .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()) + .updater(new AdaGrad(0.5)).build()) .layer(4, new OutputLayer.Builder().nIn(10).nOut(10).name("l4") - .updater(org.deeplearning4j.nn.conf.Updater.ADAMAX).build()) + .updater(new AdaMax(0.5)).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -930,19 +819,19 @@ public void testUpdaterBlockMlnAndCG() { MultiLayerUpdater u = (MultiLayerUpdater) net.getUpdater(); blocks = u.getUpdaterBlocks(); } else { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.5) + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() .graphBuilder().addInputs("in") .addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10) - .updater(org.deeplearning4j.nn.conf.Updater.ADAM).build(), "in") + .updater(new Adam(0.5)).build(), "in") .addLayer("l1", new DenseLayer.Builder().nIn(10).nOut(10) - .updater(org.deeplearning4j.nn.conf.Updater.ADAM).biasLearningRate(0.25) + .updater(new Adam(0.5)).biasUpdater(new Adam(0.25)) .build(), "l0") .addLayer("l2", new DenseLayer.Builder().nIn(10).nOut(10) - .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).build(), "l1") + .updater(new AdaDelta()).build(), "l1") .addLayer("l3", new DenseLayer.Builder().nIn(10).nOut(10) - .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build(), "l2") + .updater(new AdaGrad(0.5)).build(), "l2") .addLayer("l4", new OutputLayer.Builder().nIn(10).nOut(10) - .updater(org.deeplearning4j.nn.conf.Updater.ADAMAX).build(), "l3") + .updater(new AdaMax(0.5)).build(), "l3") .setOutputs("l4").build(); ComputationGraph net = new ComputationGraph(conf); @@ -1043,8 +932,7 @@ public void testUpdaterBlockVae() { List blocks; MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(0.5) - .updater(org.deeplearning4j.nn.conf.Updater.ADAM).list() + new NeuralNetConfiguration.Builder().updater(new Adam(0.5)).list() .layer(0, new VariationalAutoencoder.Builder().nIn(8).nOut(12) .encoderLayerSizes(10, 11).decoderLayerSizes(13, 14).build()) .build(); @@ -1078,86 +966,4 @@ public void testUpdaterBlockVae() { } assertEquals(expParams, actParams); } - - - @Test - public void testUpdaterConfigDeprecatedMethods() { - //.momentum(), .epsilon() etc - these are now deprecated, but we still want them to work as expected - // until they are actually removed - - double lr = 0.75; - double eps = 0.65; - double adamMean = 0.1; - double adamVar = 0.2; - double momentum = 0.3; - Map momentumSchedule = new HashMap<>(); - momentumSchedule.put(0, 0.35); - momentumSchedule.put(10, 0.34); - double rmsDecay = 0.4; - - for (boolean useEnum : new boolean[] {true, false}) { - NeuralNetConfiguration.ListBuilder listBuilder = new NeuralNetConfiguration.Builder() - //Multiple updaters - .learningRate(lr).epsilon(eps) - //Adam - .adamMeanDecay(adamMean).adamVarDecay(adamVar) - //Momentum - .momentum(momentum).momentumAfter(momentumSchedule) - //RMSProp - .rmsDecay(rmsDecay).list(); - if (useEnum) { - listBuilder.layer(0, - new DenseLayer.Builder().nIn(10).nOut(10) - .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) - .layer(1, new DenseLayer.Builder().nIn(10).nOut(10) - .updater(org.deeplearning4j.nn.conf.Updater.ADAM).build()) - .layer(2, new DenseLayer.Builder().nIn(10).nOut(10) - .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).build()) - .layer(3, new DenseLayer.Builder().nIn(10).nOut(10) - .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()) - .layer(4, new DenseLayer.Builder().nIn(10).nOut(10) - .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()) - .layer(5, new DenseLayer.Builder().nIn(10).nOut(10) - .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build()); - } else { - listBuilder.layer(0, new DenseLayer.Builder().nIn(10).nOut(10).updater(new Sgd()).build()) - .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).updater(new Adam()).build()) - .layer(2, new DenseLayer.Builder().nIn(10).nOut(10).updater(new AdaDelta()).build()) - .layer(3, new DenseLayer.Builder().nIn(10).nOut(10).updater(new Nesterovs()).build()) - .layer(4, new DenseLayer.Builder().nIn(10).nOut(10).updater(new AdaGrad()).build()) - .layer(5, new DenseLayer.Builder().nIn(10).nOut(10).updater(new RmsProp()).build()); - } - - - MultiLayerConfiguration conf = listBuilder.build(); - - Sgd sgd = (Sgd) ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(0).getLayer()).getIUpdater(); - assertEquals(lr, sgd.getLearningRate(), 1e-6); - - Adam adam = (Adam) ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(1).getLayer()).getIUpdater(); - assertEquals(lr, adam.getLearningRate(), 1e-6); - assertEquals(eps, adam.getEpsilon(), 1e-6); - assertEquals(adamMean, adam.getBeta1(), 1e-6); - assertEquals(adamVar, adam.getBeta2(), 1e-6); - - //Adadelta: no params - - Nesterovs nesterovs = (Nesterovs) ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(3).getLayer()) - .getIUpdater(); - assertEquals(lr, nesterovs.getLearningRate(), 1e-6); - assertEquals(momentum, nesterovs.getMomentum(), 1e-6); - assertEquals(momentumSchedule, nesterovs.getMomentumSchedule()); - - AdaGrad adagrad = (AdaGrad) ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(4).getLayer()) - .getIUpdater(); - assertEquals(lr, adagrad.getLearningRate(), 1e-6); - assertEquals(eps, adagrad.getEpsilon(), 1e-6); - - RmsProp rmsProp = (RmsProp) ((org.deeplearning4j.nn.conf.layers.BaseLayer) conf.getConf(5).getLayer()) - .getIUpdater(); - assertEquals(lr, rmsProp.getLearningRate(), 1e-6); - assertEquals(rmsDecay, rmsProp.getRmsDecay(), 1e-6); - assertEquals(eps, rmsProp.getEpsilon(), 1e-6); - } - } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomGradientUpdater.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomGradientUpdater.java index 7a7c00594d91..be1f1cd58129 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomGradientUpdater.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomGradientUpdater.java @@ -23,7 +23,7 @@ public void setStateViewArray(INDArray viewArray, int[] gradientShape, char grad } @Override - public void applyUpdater(INDArray gradient, int iteration) { + public void applyUpdater(INDArray gradient, int iteration, int epoch) { gradient.muli(config.getLearningRate()); } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomIUpdater.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomIUpdater.java index 6e9519c8010e..86de505e40b6 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomIUpdater.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/CustomIUpdater.java @@ -26,11 +26,6 @@ public long stateSize(long numParams) { return 0; } - @Override - public void applySchedules(int iteration, double newLearningRate) { - this.learningRate = newLearningRate; - } - @Override public GradientUpdater instantiate(INDArray viewArray, boolean initializeViewArray) { if (viewArray != null) { diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java index f0b8a28557c4..83496942360d 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java @@ -2,7 +2,6 @@ import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -30,16 +29,16 @@ public void testCustomUpdater() { double lr = 0.03; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345).learningRate(lr) - .activation(Activation.TANH).updater(new CustomIUpdater()) //Specify custom IUpdater + MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345) + .activation(Activation.TANH).updater(new CustomIUpdater(lr)) //Specify custom IUpdater .list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new OutputLayer.Builder().nIn(10).nOut(10) .lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345).learningRate(lr) - .activation(Activation.TANH).updater(Updater.SGD).list() + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345) + .activation(Activation.TANH).updater(new Sgd(lr)).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder() .nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java index 7a533544417a..7ce2185c594e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java @@ -4,7 +4,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -21,6 +20,7 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.Collections; @@ -231,8 +231,7 @@ public void testBackTrackLineHessian() { private static MultiLayerConfiguration getIrisMultiLayerConfig(Activation activationFunction, int iterations, OptimizationAlgorithm optimizer) { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(optimizer) - .iterations(iterations).miniBatch(false).momentum(0.9).learningRate(0.01) - .updater(Updater.NESTEROVS).seed(12345L).list() + .iterations(iterations).miniBatch(false).updater(new Nesterovs(0.9)).seed(12345L).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) .activation(activationFunction).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java index 6e297143eef1..26ea59d43f64 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java @@ -1,6 +1,5 @@ package org.deeplearning4j.optimize.solver; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; @@ -9,7 +8,6 @@ import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.RBM; @@ -36,7 +34,10 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.conditions.Condition; +import org.nd4j.linalg.learning.config.AdaGrad; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; import java.util.Collection; @@ -115,15 +116,13 @@ public void testOptimizersMLP() { private static MultiLayerConfiguration getMLPConfigIris(OptimizationAlgorithm oa, int nIterations) { MultiLayerConfiguration c = new NeuralNetConfiguration.Builder().optimizationAlgo(oa).iterations(nIterations) - .learningRate(1e-1).seed(12345L) + .updater(new AdaGrad(1e-1)).seed(12345L) .list().layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) - .updater(Updater.ADAGRAD).activation( - Activation.RELU) + .activation(Activation.RELU) .build()) .layer(1, new OutputLayer.Builder(LossFunction.MCXENT).nIn(3).nOut(3) - .weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD) - .activation(Activation.SOFTMAX).build()) + .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) .backprop(true).pretrain(false).build(); return c; @@ -188,8 +187,8 @@ public void testSphereFnOptHelper(OptimizationAlgorithm oa, int numLineSearchIte + nDimensions); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().maxNumLineSearchIterations(numLineSearchIter) - .iterations(100).learningRate(1e-2) - .layer(new RBM.Builder().nIn(1).nOut(1).updater(Updater.SGD).build()).build(); + .iterations(100).updater(new Sgd(1e-2)) + .layer(new RBM.Builder().nIn(1).nOut(1).build()).build(); conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here Random rng = new DefaultRandom(12345L); @@ -278,8 +277,8 @@ private static void testSphereFnMultipleStepsHelper(OptimizationAlgorithm oa, in org.nd4j.linalg.api.rng.distribution.Distribution dist = new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() - .maxNumLineSearchIterations(maxNumLineSearchIter).iterations(i).learningRate(0.1) - .layer(new DenseLayer.Builder().nIn(1).nOut(1).updater(Updater.SGD).build()).build(); + .maxNumLineSearchIterations(maxNumLineSearchIter).iterations(i).updater(new Sgd(0.1)) + .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here Model m = new SphereFunctionModel(100, dist, conf); @@ -348,11 +347,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) { throw new UnsupportedOperationException(); } - @Override - public void applyLearningRateScoreDecay() { - - } - @Override public void setCacheMode(CacheMode mode) { throw new UnsupportedOperationException(); @@ -377,6 +371,11 @@ public void setInput(INDArray input) { public boolean isPretrainLayer() { return false; } + + @Override + public void clearNoiseWeightParams() { + + } } @@ -412,8 +411,8 @@ private static void testRastriginFnMultipleStepsHelper(OptimizationAlgorithm oa, for (int i = 0; i <= nOptIter; i++) { NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .maxNumLineSearchIterations(maxNumLineSearchIter).iterations(i).miniBatch(false) - .learningRate(1e-2) - .layer(new DenseLayer.Builder().nIn(1).nOut(1).updater(Updater.ADAGRAD).build()).build(); + .updater(new AdaGrad(1e-2)) + .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here Model m = new RastriginFunctionModel(10, conf); @@ -536,11 +535,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) { throw new UnsupportedOperationException(); } - @Override - public void applyLearningRateScoreDecay() { - - } - @Override public void setCacheMode(CacheMode mode) { @@ -566,6 +560,11 @@ public void setInput(INDArray input) { public boolean isPretrainLayer() { return false; } + + @Override + public void clearNoiseWeightParams() { + + } } @@ -595,8 +594,9 @@ private static void testRosenbrockFnMultipleStepsHelper(OptimizationAlgorithm oa for (int i = 0; i <= nOptIter; i++) { NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .maxNumLineSearchIterations(maxNumLineSearchIter).iterations(i) + .updater(new Sgd(1e-1)) .stepFunction(new org.deeplearning4j.nn.conf.stepfunctions.NegativeDefaultStepFunction()) - .learningRate(1e-1).layer(new RBM.Builder().nIn(1).nOut(1).updater(Updater.SGD).build()) + .layer(new RBM.Builder().nIn(1).nOut(1).build()) .build(); conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here @@ -733,11 +733,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) { throw new UnsupportedOperationException(); } - @Override - public void applyLearningRateScoreDecay() { - - } - @Override public void setCacheMode(CacheMode mode) { @@ -763,6 +758,11 @@ public void setInput(INDArray input) { public boolean isPretrainLayer() { return false; } + + @Override + public void clearNoiseWeightParams() { + + } } @@ -992,36 +992,11 @@ public Type type() { throw new UnsupportedOperationException(); } - @Override - public Gradient error(INDArray input) { - throw new UnsupportedOperationException(); - } - - @Override - public INDArray derivativeActivation(INDArray input) { - throw new UnsupportedOperationException(); - } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray indArray) { - throw new UnsupportedOperationException(); - } - @Override public Pair backpropGradient(INDArray epsilon) { throw new UnsupportedOperationException(); } - @Override - public void merge(Layer layer, int batchSize) { - throw new UnsupportedOperationException(); - } - - @Override - public INDArray activationMean() { - throw new UnsupportedOperationException(); - } - @Override public INDArray preOutput(INDArray x) { throw new UnsupportedOperationException(); @@ -1093,5 +1068,25 @@ public INDArray getGradientsViewArray() { public void applyConstraints(int iteration, int epoch) { } + + @Override + public int getIterationCount() { + return 0; + } + + @Override + public int getEpochCount() { + return 0; + } + + @Override + public void setIterationCount(int iterationCount) { + + } + + @Override + public void setEpochCount(int epochCount) { + + } } } diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestParamAndGradientIterationListener.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestParamAndGradientIterationListener.java index 3f70b01511ac..399563ec7b3e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestParamAndGradientIterationListener.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/optimizer/listener/TestParamAndGradientIterationListener.java @@ -12,6 +12,7 @@ import org.deeplearning4j.optimize.listeners.ParamAndGradientIterationListener; import org.junit.Test; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.io.File; @@ -24,7 +25,7 @@ public void test() { IrisDataSetIterator iter = new IrisDataSetIterator(30, 150); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1e-5) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(1e-5)) .iterations(1).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/ParallelExistingMiniBatchDataSetIteratorTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/ParallelExistingMiniBatchDataSetIteratorTest.java index 8d55d3f41093..b099c3ebb5ec 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/ParallelExistingMiniBatchDataSetIteratorTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/ParallelExistingMiniBatchDataSetIteratorTest.java @@ -2,12 +2,12 @@ import lombok.extern.slf4j.Slf4j; import org.datavec.api.util.ClassPathResource; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.iterator.callbacks.DataSetDeserializer; import org.deeplearning4j.datasets.iterator.parallel.FileSplitParallelDataSetIterator; import org.junit.Before; import org.junit.Test; import org.nd4j.linalg.dataset.DataSet; +import org.nd4j.linalg.primitives.Pair; import java.io.File; import java.util.ArrayList; diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java index ef7c208fe495..4a097140671b 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java @@ -1,10 +1,8 @@ package org.deeplearning4j.parallelism; import org.deeplearning4j.nn.api.Model; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -16,6 +14,7 @@ import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.List; @@ -42,15 +41,10 @@ public void testModelInitialParamsEquality1() throws Exception { @Override public void run() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(119).iterations(1) // Training iterations as above - .regularization(true).l2(0.0005) - /* - Uncomment the following for learning decay and bias - */ - .learningRate(.01)//.biasLearningRate(0.02) + .l2(0.0005) //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) .weightInit(WeightInit.XAVIER) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.NESTEROVS).momentum(0.9) + .updater(new Nesterovs(0.01, 0.9)) .trainingWorkspaceMode(WorkspaceMode.SINGLE).list() .layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java index 34fbd741e194..d1f4b3d8c00e 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java @@ -2,8 +2,8 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; @@ -52,9 +52,8 @@ public void regressionTestMLP1() throws Exception { assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); assertEquals(WeightInit.XAVIER, l0.getWeightInit()); - assertEquals(Updater.NESTEROVS, l0.getUpdater()); - assertEquals(0.9, l0.getMomentum(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); + assertEquals(new Nesterovs(0.15, 0.9), l0.getIUpdater()); + assertEquals(0.15, ((Nesterovs)l0.getIUpdater()).getLearningRate(), 1e-6); OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer(); assertEquals("softmax", l1.getActivationFn().toString()); @@ -63,9 +62,9 @@ public void regressionTestMLP1() throws Exception { assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); assertEquals(WeightInit.XAVIER, l1.getWeightInit()); - assertEquals(Updater.NESTEROVS, l1.getUpdater()); - assertEquals(0.9, l1.getMomentum(), 1e-6); - assertEquals(0.15, l1.getLearningRate(), 1e-6); + assertEquals(new Nesterovs(0.15, 0.9), l1.getIUpdater()); + assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6); + assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6); int numParams = net.numParams(); assertEquals(Nd4j.linspace(1, numParams, numParams), net.params()); @@ -93,10 +92,9 @@ public void regressionTestMLP2() throws Exception { assertEquals(4, l0.getNOut()); assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit()); assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l0.getRmsDecay(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); - assertEquals(0.6, l0.getDropOut(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); + assertEquals(new Dropout(0.6), l0.getIDropout()); assertEquals(0.1, l0.getL1(), 1e-6); assertEquals(0.2, l0.getL2(), 1e-6); @@ -108,10 +106,9 @@ public void regressionTestMLP2() throws Exception { assertEquals(5, l1.getNOut()); assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit()); assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l1.getRmsDecay(), 1e-6); - assertEquals(0.15, l1.getLearningRate(), 1e-6); - assertEquals(0.6, l1.getDropOut(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater()); + assertEquals(0.15, ((RmsProp)l1.getIUpdater()).getLearningRate(), 1e-6); + assertEquals(new Dropout(0.6), l1.getIDropout()); assertEquals(0.1, l1.getL1(), 1e-6); assertEquals(0.2, l1.getL2(), 1e-6); @@ -140,9 +137,8 @@ public void regressionTestCNN1() throws Exception { assertEquals(3, l0.getNIn()); assertEquals(3, l0.getNOut()); assertEquals(WeightInit.RELU, l0.getWeightInit()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l0.getRmsDecay(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertArrayEquals(new int[] {2, 2}, l0.getKernelSize()); assertArrayEquals(new int[] {1, 1}, l0.getStride()); assertArrayEquals(new int[] {0, 0}, l0.getPadding()); @@ -162,9 +158,8 @@ public void regressionTestCNN1() throws Exception { assertEquals(26 * 26 * 3, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals(WeightInit.RELU, l0.getWeightInit()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l0.getRmsDecay(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); int numParams = net.numParams(); assertEquals(Nd4j.linspace(1, numParams, numParams), net.params()); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java index 7bf27d3d0f75..bcd46cf56ca0 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java @@ -1,7 +1,11 @@ package org.deeplearning4j.regressiontest; -import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; @@ -53,9 +57,8 @@ public void regressionTestMLP1() throws Exception { assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); assertEquals(WeightInit.XAVIER, l0.getWeightInit()); - assertEquals(Updater.NESTEROVS, l0.getUpdater()); - assertEquals(0.9, l0.getMomentum(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); + assertEquals(new Nesterovs(0.15, 0.9), l0.getIUpdater()); + assertEquals(0.15, ((Nesterovs)l0.getIUpdater()).getLearningRate(), 1e-6); OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer(); assertEquals("softmax", l1.getActivationFn().toString()); @@ -64,9 +67,9 @@ public void regressionTestMLP1() throws Exception { assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); assertEquals(WeightInit.XAVIER, l1.getWeightInit()); - assertEquals(Updater.NESTEROVS, l1.getUpdater()); - assertEquals(0.9, l1.getMomentum(), 1e-6); - assertEquals(0.15, l1.getLearningRate(), 1e-6); + assertEquals(new Nesterovs(0.15, 0.9), l1.getIUpdater()); + assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6); + assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6); int numParams = net.numParams(); assertEquals(Nd4j.linspace(1, numParams, numParams), net.params()); @@ -94,10 +97,9 @@ public void regressionTestMLP2() throws Exception { assertEquals(4, l0.getNOut()); assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit()); assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l0.getRmsDecay(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); - assertEquals(0.6, l0.getDropOut(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); + assertEquals(new Dropout(0.6), l0.getIDropout()); assertEquals(0.1, l0.getL1(), 1e-6); assertEquals(0.2, l0.getL2(), 1e-6); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l0.getGradientNormalization()); @@ -111,10 +113,9 @@ public void regressionTestMLP2() throws Exception { assertEquals(5, l1.getNOut()); assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit()); assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l1.getRmsDecay(), 1e-6); - assertEquals(0.15, l1.getLearningRate(), 1e-6); - assertEquals(0.6, l1.getDropOut(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater()); + assertEquals(0.15, ((RmsProp)l1.getIUpdater()).getLearningRate(), 1e-6); + assertEquals(new Dropout(0.6), l1.getIDropout()); assertEquals(0.1, l1.getL1(), 1e-6); assertEquals(0.2, l1.getL2(), 1e-6); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization()); @@ -145,9 +146,8 @@ public void regressionTestCNN1() throws Exception { assertEquals(3, l0.getNIn()); assertEquals(3, l0.getNOut()); assertEquals(WeightInit.RELU, l0.getWeightInit()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l0.getRmsDecay(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertArrayEquals(new int[] {2, 2}, l0.getKernelSize()); assertArrayEquals(new int[] {1, 1}, l0.getStride()); assertArrayEquals(new int[] {0, 0}, l0.getPadding()); @@ -167,9 +167,8 @@ public void regressionTestCNN1() throws Exception { assertEquals(26 * 26 * 3, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals(WeightInit.RELU, l0.getWeightInit()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l0.getRmsDecay(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java index 34b6d89e1fcd..b2a361cec589 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java @@ -1,7 +1,11 @@ package org.deeplearning4j.regressiontest; -import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; @@ -54,9 +58,8 @@ public void regressionTestMLP1() throws Exception { assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); assertEquals(WeightInit.XAVIER, l0.getWeightInit()); - assertEquals(Updater.NESTEROVS, l0.getUpdater()); - assertEquals(0.9, l0.getMomentum(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); + assertEquals(new Nesterovs(0.15, 0.9), l0.getIUpdater()); + assertEquals(0.15, ((Nesterovs)l0.getIUpdater()).getLearningRate(), 1e-6); OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer(); assertEquals("softmax", l1.getActivationFn().toString()); @@ -65,9 +68,9 @@ public void regressionTestMLP1() throws Exception { assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); assertEquals(WeightInit.XAVIER, l1.getWeightInit()); - assertEquals(Updater.NESTEROVS, l1.getUpdater()); - assertEquals(0.9, l1.getMomentum(), 1e-6); - assertEquals(0.15, l1.getLearningRate(), 1e-6); + assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6); + assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6); + assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6); int numParams = net.numParams(); assertEquals(Nd4j.linspace(1, numParams, numParams), net.params()); @@ -95,10 +98,9 @@ public void regressionTestMLP2() throws Exception { assertEquals(4, l0.getNOut()); assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit()); assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l0.getRmsDecay(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); - assertEquals(0.6, l0.getDropOut(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); + assertEquals(new Dropout(0.6), l0.getIDropout()); assertEquals(0.1, l0.getL1(), 1e-6); assertEquals(0.2, l0.getL2(), 1e-6); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l0.getGradientNormalization()); @@ -112,10 +114,9 @@ public void regressionTestMLP2() throws Exception { assertEquals(5, l1.getNOut()); assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit()); assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l1.getRmsDecay(), 1e-6); - assertEquals(0.15, l1.getLearningRate(), 1e-6); - assertEquals(0.6, l1.getDropOut(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater()); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); + assertEquals(new Dropout(0.6), l1.getIDropout()); assertEquals(0.1, l1.getL1(), 1e-6); assertEquals(0.2, l1.getL2(), 1e-6); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization()); @@ -146,9 +147,8 @@ public void regressionTestCNN1() throws Exception { assertEquals(3, l0.getNIn()); assertEquals(3, l0.getNOut()); assertEquals(WeightInit.RELU, l0.getWeightInit()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l0.getRmsDecay(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertArrayEquals(new int[] {2, 2}, l0.getKernelSize()); assertArrayEquals(new int[] {1, 1}, l0.getStride()); assertArrayEquals(new int[] {0, 0}, l0.getPadding()); @@ -168,9 +168,8 @@ public void regressionTestCNN1() throws Exception { assertEquals(26 * 26 * 3, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals(WeightInit.RELU, l0.getWeightInit()); - assertEquals(Updater.RMSPROP, l0.getUpdater()); - assertEquals(0.96, l0.getRmsDecay(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); + assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java index 00bb3b80d83b..f097576c5409 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java @@ -5,6 +5,7 @@ import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; @@ -58,7 +59,7 @@ public void regressionTestMLP1() throws Exception { assertTrue(l0.getIUpdater() instanceof Nesterovs); Nesterovs n = (Nesterovs) l0.getIUpdater(); assertEquals(0.9, n.getMomentum(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); + assertEquals(0.15, ((Nesterovs)l0.getIUpdater()).getLearningRate(), 1e-6); assertEquals(0.15, n.getLearningRate(), 1e-6); @@ -69,9 +70,8 @@ public void regressionTestMLP1() throws Exception { assertEquals(5, l1.getNOut()); assertEquals(WeightInit.XAVIER, l1.getWeightInit()); assertTrue(l1.getIUpdater() instanceof Nesterovs); - n = (Nesterovs) l1.getIUpdater(); - assertEquals(0.9, n.getMomentum(), 1e-6); - assertEquals(0.15, l1.getLearningRate(), 1e-6); + assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6); + assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6); assertEquals(0.15, n.getLearningRate(), 1e-6); int numParams = net.numParams(); @@ -104,8 +104,8 @@ public void regressionTestMLP2() throws Exception { RmsProp r = (RmsProp) l0.getIUpdater(); assertEquals(0.96, r.getRmsDecay(), 1e-6); assertEquals(0.15, r.getLearningRate(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); - assertEquals(0.6, l0.getDropOut(), 1e-6); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); + assertEquals(new Dropout(0.6), l0.getIDropout()); assertEquals(0.1, l0.getL1(), 1e-6); assertEquals(0.2, l0.getL2(), 1e-6); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l0.getGradientNormalization()); @@ -122,8 +122,8 @@ public void regressionTestMLP2() throws Exception { r = (RmsProp) l1.getIUpdater(); assertEquals(0.96, r.getRmsDecay(), 1e-6); assertEquals(0.15, r.getLearningRate(), 1e-6); - assertEquals(0.15, l1.getLearningRate(), 1e-6); - assertEquals(0.6, l1.getDropOut(), 1e-6); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); + assertEquals(new Dropout(0.6), l1.getIDropout()); assertEquals(0.1, l1.getL1(), 1e-6); assertEquals(0.2, l1.getL2(), 1e-6); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization()); @@ -158,7 +158,7 @@ public void regressionTestCNN1() throws Exception { RmsProp r = (RmsProp) l0.getIUpdater(); assertEquals(0.96, r.getRmsDecay(), 1e-6); assertEquals(0.15, r.getLearningRate(), 1e-6); - assertEquals(0.15, l0.getLearningRate(), 1e-6); + assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertArrayEquals(new int[] {2, 2}, l0.getKernelSize()); assertArrayEquals(new int[] {1, 1}, l0.getStride()); assertArrayEquals(new int[] {0, 0}, l0.getPadding()); diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java index 3e1452e96faf..6f5856d20041 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java @@ -16,6 +16,7 @@ import org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.io.ClassPathResource; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.io.BufferedOutputStream; @@ -58,8 +59,8 @@ public void testLoadNormalizers() throws Exception { int nIn = 5; int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01) - .l2(0.01).learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01).l2(0.01) + .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) @@ -89,8 +90,8 @@ public void testModelGuesserDl4jModel() throws Exception { int nIn = 5; int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01) - .l2(0.01).learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) diff --git a/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java b/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java index 1489fc681d80..b3acbd24ceb2 100644 --- a/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java +++ b/deeplearning4j-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java @@ -18,6 +18,7 @@ import org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler; import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.io.File; @@ -37,8 +38,8 @@ public void testWriteMLNModel() throws Exception { int nIn = 5; int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01) - .l2(0.01).learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) @@ -64,8 +65,8 @@ public void testWriteMlnModelInputStream() throws Exception { int nIn = 5; int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l1(0.01) - .l2(0.01).learningRate(0.1).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) @@ -108,7 +109,7 @@ public void testWriteMlnModelInputStream() throws Exception { @Test public void testWriteCGModel() throws Exception { ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.1) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) @@ -134,7 +135,7 @@ public void testWriteCGModel() throws Exception { @Test public void testWriteCGModelInputStream() throws Exception { ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.1) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) @@ -166,7 +167,7 @@ private DataSet trivialDataSet() { private ComputationGraph simpleComputationGraph() { ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.1) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java index 0daf03d8b3c2..9acc3f5386a3 100644 --- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java +++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java @@ -19,7 +19,6 @@ import lombok.extern.slf4j.Slf4j; import org.bytedeco.javacpp.Pointer; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.AlgoMode; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.BwdDataAlgo; @@ -41,6 +40,7 @@ import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.jcublas.context.CudaContext; +import org.nd4j.linalg.primitives.Pair; import static org.bytedeco.javacpp.cuda.CUstream_st; import static org.bytedeco.javacpp.cudnn.*; diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java index 348657b7a81d..fdb8bde0e74b 100644 --- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java +++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java @@ -19,7 +19,6 @@ import lombok.extern.slf4j.Slf4j; import org.bytedeco.javacpp.Pointer; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -36,6 +35,7 @@ import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.jcublas.context.CudaContext; +import org.nd4j.linalg.primitives.Pair; import static org.bytedeco.javacpp.cuda.CUstream_st; import static org.bytedeco.javacpp.cudnn.*; diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java index 3596f2a70be2..088688186736 100644 --- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java +++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java @@ -19,7 +19,6 @@ import lombok.extern.slf4j.Slf4j; import org.bytedeco.javacpp.Pointer; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseCudnnHelper; @@ -32,6 +31,7 @@ import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.jcublas.context.CudaContext; +import org.nd4j.linalg.primitives.Pair; import static org.bytedeco.javacpp.cuda.CUstream_st; import static org.bytedeco.javacpp.cudnn.*; diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnLocalResponseNormalizationHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnLocalResponseNormalizationHelper.java index 813d567bb613..cf292aaa8248 100644 --- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnLocalResponseNormalizationHelper.java +++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnLocalResponseNormalizationHelper.java @@ -19,7 +19,6 @@ import lombok.extern.slf4j.Slf4j; import org.bytedeco.javacpp.Pointer; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseCudnnHelper; @@ -31,6 +30,7 @@ import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.jcublas.context.CudaContext; +import org.nd4j.linalg.primitives.Pair; import static org.bytedeco.javacpp.cuda.CUstream_st; import static org.bytedeco.javacpp.cudnn.*; diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/recurrent/CudnnLSTMHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/recurrent/CudnnLSTMHelper.java index 80b8308e0859..02ab6f2c330f 100644 --- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/recurrent/CudnnLSTMHelper.java +++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/recurrent/CudnnLSTMHelper.java @@ -19,7 +19,6 @@ import lombok.extern.slf4j.Slf4j; import org.bytedeco.javacpp.Pointer; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -34,6 +33,7 @@ import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.jcublas.context.CudaContext; +import org.nd4j.linalg.primitives.Pair; import java.util.Map; diff --git a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/TestConvolution.java b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/TestConvolution.java index 074fb0c9d041..6f60ac5e33da 100644 --- a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/TestConvolution.java +++ b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/TestConvolution.java @@ -1,6 +1,5 @@ package org.deeplearning4j.convolution; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; @@ -16,7 +15,9 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.primitives.Pair; import java.lang.reflect.Field; import java.util.Arrays; @@ -83,8 +84,8 @@ public void testCompareCudnnStandardOutputsVsMode() throws Exception { l = new SubsamplingLayer.Builder().kernelSize(4, 4).stride(2, 2).build(); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(true) - .l2(0.0005).learningRate(.01).weightInit(WeightInit.XAVIER).convolutionMode(c).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + .l2(0.0005).updater(new Sgd(0.01)).weightInit(WeightInit.XAVIER).convolutionMode(c).list() .layer(0, l) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(10).activation(Activation.SOFTMAX).build()) diff --git a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CuDNNGradientChecks.java b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CuDNNGradientChecks.java index e691dcc11868..9854beba2a83 100644 --- a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CuDNNGradientChecks.java +++ b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CuDNNGradientChecks.java @@ -4,7 +4,6 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -26,6 +25,7 @@ import org.nd4j.linalg.api.buffer.util.DataTypeUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.lang.reflect.Field; @@ -56,7 +56,7 @@ public void testConvolutional() throws Exception { // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) - String[] activFns = {"sigmoid", "tanh"}; + Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first int[] minibatchSizes = {1, 4}; @@ -69,7 +69,7 @@ public void testConvolutional() throws Exception { f.setAccessible(true); Random r = new Random(12345); - for (String afn : activFns) { + for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int minibatchSize : minibatchSizes) { @@ -79,10 +79,10 @@ public void testConvolutional() throws Exception { labels.putScalar(i, r.nextInt(nOut), 1.0); } - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().regularization(false) + MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-1, 1)) - .updater(Updater.NONE).seed(12345L).list() + .updater(new NoOp()).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(1, 1).nOut(3) .activation(afn).build()) .layer(1, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(0, 0).nOut(3) @@ -172,9 +172,9 @@ public void testConvolutionalNoBias() throws Exception { labels.putScalar(i, r.nextInt(nOut), 1.0); } - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().regularization(false) + MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-1, 1)) - .updater(Updater.NONE).seed(12345L) + .updater(new NoOp()).seed(12345L) .list() .layer(0, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(1, 1).nOut(3) .hasBias(convHasBias) @@ -235,8 +235,8 @@ public void testBatchNormCnn() throws Exception { labels.putScalar(i, r.nextInt(nOut), 1.0); } - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0) - .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp()) + .seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 2)).list() .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) .activation(Activation.IDENTITY).build()) @@ -289,8 +289,8 @@ public void testLRN() throws Exception { labels.putScalar(i, r.nextInt(nOut), 1.0); } - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0) - .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp()) + .seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 2)).list() .layer(0, new ConvolutionLayer.Builder().nOut(6).kernelSize(2, 2).stride(1, 1) .activation(Activation.TANH).build()) @@ -346,8 +346,8 @@ public void testLSTM() throws Exception { } } - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0) - .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 2)).list() .layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize) .gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build()) @@ -403,8 +403,8 @@ public void testLSTM2() throws Exception { } } - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0) - .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() + .updater(new NoOp()).seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 2)).list() .layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize) .gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build()) @@ -480,7 +480,7 @@ public void testCnnDilated() throws Exception { } NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345) - .learningRate(1.0).updater(Updater.SGD) + .updater(new NoOp()) .activation(Activation.TANH).convolutionMode(cm).list() .layer(new ConvolutionLayer.Builder().name("layer 0") .kernelSize(k, k) diff --git a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/lstm/ValidateCudnnLSTM.java b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/lstm/ValidateCudnnLSTM.java index 359ed0422a8e..c40b2c0c0534 100644 --- a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/lstm/ValidateCudnnLSTM.java +++ b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/lstm/ValidateCudnnLSTM.java @@ -14,6 +14,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.lang.reflect.Field; @@ -46,8 +47,8 @@ public void validateImplSimple() throws Exception { } MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().inferenceWorkspaceMode(WorkspaceMode.NONE) - .trainingWorkspaceMode(WorkspaceMode.NONE).learningRate(1.0).regularization(false) - .updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + .trainingWorkspaceMode(WorkspaceMode.NONE).updater(new NoOp()) + .seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 2)).list() .layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize) .gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build()) @@ -123,9 +124,9 @@ public void validateImplMultiLayer() throws Exception { } } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) .inferenceWorkspaceMode(WorkspaceMode.NONE).trainingWorkspaceMode(WorkspaceMode.NONE) - .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + .seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 2)).list() .layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize) .gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build()) @@ -207,9 +208,9 @@ public void validateImplMultiLayerTBPTT() throws Exception { int tbpttLength = 5; int nOut = 2; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) .inferenceWorkspaceMode(WorkspaceMode.NONE).trainingWorkspaceMode(WorkspaceMode.NONE) - .regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION) + .seed(12345L).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 2)).list() .layer(0, new LSTM.Builder().nIn(inputSize).nOut(lstmLayerSize) .gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build()) @@ -274,9 +275,9 @@ public void validateImplMultiLayerRnnTimeStep() throws Exception { int tbpttLength = 5; int nOut = 2; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(1.0) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) .inferenceWorkspaceMode(WorkspaceMode.NONE).trainingWorkspaceMode(WorkspaceMode.NONE) - .cacheMode(CacheMode.NONE).regularization(false).updater(Updater.NONE).seed(12345L) + .cacheMode(CacheMode.NONE).seed(12345L) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 2)).list() .layer(0, new LSTM.Builder().nIn(inputSize).nOut(lstmLayerSize) .gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build()) diff --git a/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/embeddings/GraphVectorsImpl.java b/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/embeddings/GraphVectorsImpl.java index ec4cf5bbe04f..ac90ab0028dd 100644 --- a/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/embeddings/GraphVectorsImpl.java +++ b/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/embeddings/GraphVectorsImpl.java @@ -2,7 +2,6 @@ import lombok.AllArgsConstructor; import lombok.NoArgsConstructor; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.graph.api.IGraph; import org.deeplearning4j.graph.api.Vertex; import org.deeplearning4j.graph.models.GraphVectors; @@ -10,6 +9,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; import java.util.Comparator; import java.util.PriorityQueue; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java index a8f9cd869501..765a837f55a1 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java @@ -30,13 +30,12 @@ import org.nd4j.shade.jackson.databind.ObjectMapper; import java.io.IOException; +import java.lang.Exception; import java.util.ArrayList; import java.util.List; import static org.bytedeco.javacpp.hdf5.*; -import java.lang.Exception; - /** * Class for reading ND4J arrays and JSON strings from HDF5 * achive files. diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java index b054ba8a176d..f776fa99a7db 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java @@ -27,7 +27,6 @@ import org.deeplearning4j.nn.conf.graph.PreprocessorVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.graph.ComputationGraph; - import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasModelConfiguration; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; @@ -40,7 +39,10 @@ import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import static org.deeplearning4j.nn.modelimport.keras.KerasLayer.DimOrder; import static org.deeplearning4j.nn.modelimport.keras.KerasLayer.customLayers; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java index 37e77177eb0d..12c44eefa78a 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java @@ -28,12 +28,15 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.layers.KerasInput; -import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils; import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelBuilder; +import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; /** * Build DL4J MultiLayerNetwork model from Keras Sequential diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java index ab418844da64..e4bd4033f3ea 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java @@ -67,6 +67,7 @@ public class KerasLayerConfiguration { private final String LAYER_CLASS_NAME_CONVOLUTION_1D = ""; // 1: Convolution1D, 2: Conv1D private final String LAYER_CLASS_NAME_CONVOLUTION_2D = ""; // 1: Convolution2D, 2: Conv2D private final String LAYER_CLASS_NAME_LEAKY_RELU = "LeakyReLU"; + private final String LAYER_CLASS_NAME_UPSAMPLING_1D = "UpSampling1D"; private final String LAYER_CLASS_NAME_UPSAMPLING_2D = "UpSampling2D"; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java index 67584d4b7e03..b4c51eff13c8 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java @@ -3,8 +3,8 @@ import lombok.Data; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import java.util.ArrayList; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java index 59125b1bf68c..e2a4e28d526f 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java @@ -4,13 +4,12 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.LossLayer; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.ArrayList; -import java.util.Map; import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLossUtils.mapLossFunction; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java index 5fc1b8dc71bb..5da8ab74c585 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java @@ -26,7 +26,6 @@ import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationLReLU; - import java.util.Map; /** diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java index 9377216622d1..1d35c3d49a50 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java @@ -20,10 +20,8 @@ import lombok.Data; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ArrayUtils; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.nd4j.linalg.api.ndarray.INDArray; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling1D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling1D.java new file mode 100644 index 000000000000..bf3887b98ec0 --- /dev/null +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling1D.java @@ -0,0 +1,95 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.modelimport.keras.layers.convolutional; + +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Upsampling1D; +import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; +import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; + +import java.util.Map; + + +/** + * Keras Upsampling1D layer support + * + * @author Max Pumperla + */ +public class KerasUpsampling1D extends KerasLayer { + + /** + * Constructor from parsed Keras layer configuration dictionary. + * + * @param layerConfig dictionary containing Keras layer configuration. + * @throws InvalidKerasConfigurationException Invalid Keras configuration exception + * @throws UnsupportedKerasConfigurationException Unsupported Keras configuration exception + */ + public KerasUpsampling1D(Map layerConfig) + throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { + this(layerConfig, true); + } + + /** + * Constructor from parsed Keras layer configuration dictionary. + * + * @param layerConfig dictionary containing Keras layer configuration + * @param enforceTrainingConfig whether to enforce training-related configuration options + * @throws InvalidKerasConfigurationException Invalid Keras configuration exception + * @throws UnsupportedKerasConfigurationException Invalid Keras configuration exception + */ + public KerasUpsampling1D(Map layerConfig, boolean enforceTrainingConfig) + throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { + super(layerConfig, enforceTrainingConfig); + + int[] size = KerasConvolutionUtils.getUpsamplingSizeFromConfig(layerConfig, 1, conf); + + Upsampling1D.Builder builder = new Upsampling1D.Builder() + .name(this.layerName) + .dropOut(this.dropout) + .size(size[0]); + + this.layer = builder.build(); + this.vertex = null; + } + + /** + * Get DL4J Upsampling1D layer. + * + * @return Upsampling1D layer + */ + public Upsampling1D getUpsampling1DLayer() { + return (Upsampling1D) this.layer; + } + + /** + * Get layer output type. + * + * @param inputType Array of InputTypes + * @return output type as InputType + * @throws InvalidKerasConfigurationException + */ + @Override + public InputType getOutputType(InputType... inputType) throws InvalidKerasConfigurationException { + if (inputType.length > 1) + throw new InvalidKerasConfigurationException( + "Keras Subsampling layer accepts only one input (received " + inputType.length + ")"); + return this.getUpsampling1DLayer().getOutputType(-1, inputType[0]); + } + +} diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java index 5679b47acf71..1a187ec89611 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java @@ -8,7 +8,6 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; - import java.util.Map; import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getZeroPaddingFromConfig; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java index d5a97d093257..5477179d02c9 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java @@ -4,8 +4,8 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import java.util.Map; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java index f485986a9b77..4e4ebc719311 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java @@ -3,13 +3,14 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ActivationLayer; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; -import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig; import java.util.Map; +import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig; + /** * Imports an Activation layer from Keras. * diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java index 16011b37deb7..564bb0252973 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java @@ -6,8 +6,8 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils; import org.deeplearning4j.nn.params.DefaultParamInitializer; @@ -17,10 +17,10 @@ import java.util.Map; import java.util.Set; +import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig; import static org.deeplearning4j.nn.modelimport.keras.utils.KerasInitilizationUtils.getWeightInitFromConfig; import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.getHasBiasFromConfig; import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.getNOutFromConfig; -import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig; /** * Imports a Dense layer from Keras. diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java index bbc6c35f8adf..15dd41aa609c 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java @@ -3,8 +3,8 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.DropoutLayer; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import java.util.Map; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java index f03fe2e07266..a1a81f1e3f08 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java @@ -6,8 +6,8 @@ import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeConvolutional; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.preprocessors.TensorFlowCnnToFeedForwardPreProcessor; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMerge.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMerge.java index e181c8da6b8e..c2a133a95948 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMerge.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMerge.java @@ -5,8 +5,8 @@ import org.deeplearning4j.nn.conf.graph.ElementWiseVertex; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java index a149016f4d82..c53620f9fe46 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java @@ -27,7 +27,6 @@ import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils; import org.nd4j.linalg.util.ArrayUtil; - import java.util.List; import java.util.Map; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java index a8c428e10e59..f918e2ef0c57 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java @@ -3,8 +3,8 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.LocalResponseNormalization; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasPoolHelper.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasPoolHelper.java index 94f467132e89..20dec069a3f9 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasPoolHelper.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasPoolHelper.java @@ -3,8 +3,8 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.graph.PoolHelperVertex; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import java.util.Map; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java index ab1d1abf5903..8d993342b85d 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java @@ -6,15 +6,14 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.EmbeddingLayer; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils; import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.factory.Nd4j; import java.util.HashMap; import java.util.Map; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java index d3b19feb003c..61e1b3770976 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java @@ -5,8 +5,8 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.BatchNormalization; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils; import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils; @@ -93,8 +93,7 @@ public KerasBatchNormalization(Map layerConfig, boolean enforceT layerConfig, conf.getLAYER_FIELD_BATCHNORMALIZATION_GAMMA_CONSTRAINT(), conf, kerasMajorVersion); BatchNormalization.Builder builder = new BatchNormalization.Builder().name(this.layerName).dropOut(this.dropout).minibatch(true) - .lockGammaBeta(false).eps(getEpsFromConfig(layerConfig)) - .momentum(getMomentumFromConfig(layerConfig)); + .lockGammaBeta(false).eps(getEpsFromConfig(layerConfig)); if (betaConstraint != null) builder.constrainBeta(betaConstraint); if (gammaConstraint != null) diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java index 54598ccec4da..c1c8cb78531b 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java @@ -23,8 +23,8 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import java.util.Map; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java index 666439c72cce..7105c1034076 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java @@ -20,17 +20,14 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.Subsampling1DLayer; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; -import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getConvolutionModeFromConfig; -import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getKernelSizeFromConfig; -import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getStrideFromConfig; -import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getPaddingFromBorderModeConfig; - import java.util.Map; +import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*; + /** * Imports a Keras 1D Pooling layer as a DL4J Subsampling layer. * diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java index a65b85d8d0ea..93ec62865e1c 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java @@ -20,17 +20,14 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; -import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getConvolutionModeFromConfig; -import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getKernelSizeFromConfig; -import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getStrideFromConfig; -import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.getPaddingFromBorderModeConfig; - import java.util.Map; +import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*; + /** * Imports a Keras 2D Pooling layer as a DL4J Subsampling layer. * diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLstm.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLstm.java index 6168b6be5731..384cbd84136c 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLstm.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLstm.java @@ -3,10 +3,12 @@ import lombok.Data; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.layers.LayerConstraint; +import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.LSTM; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; +import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils; import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils; @@ -22,10 +24,10 @@ import java.util.Map; import java.util.Set; +import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig; import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.mapActivation; import static org.deeplearning4j.nn.modelimport.keras.utils.KerasInitilizationUtils.getWeightInitFromConfig; import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.getNOutFromConfig; -import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig; /** * Imports a Keras LSTM layer as a DL4J LSTM layer. @@ -163,7 +165,11 @@ public InputType getOutputType(InputType... inputType) throws InvalidKerasConfig if (inputType.length > 1) throw new InvalidKerasConfigurationException( "Keras LSTM layer accepts only one input (received " + inputType.length + ")"); - return this.getLSTMLayer().getOutputType(-1, inputType[0]); + InputPreProcessor preProcessor = getInputPreprocessor(inputType); + if (preProcessor != null) + return preProcessor.getOutputType(inputType[0]); + else + return this.getLSTMLayer().getOutputType(-1, inputType[0]); } /** @@ -176,6 +182,28 @@ public int getNumParams() { return kerasMajorVersion == 2 ? NUM_TRAINABLE_PARAMS_KERAS_2 : NUM_TRAINABLE_PARAMS; } + /** + * Gets appropriate DL4J InputPreProcessor for given InputTypes. + * + * @param inputType Array of InputTypes + * @return DL4J InputPreProcessor + * @throws InvalidKerasConfigurationException Invalid Keras configuration exception + * @see org.deeplearning4j.nn.conf.InputPreProcessor + */ + @Override + public InputPreProcessor getInputPreprocessor(InputType... inputType) throws InvalidKerasConfigurationException { + if (inputType.length > 1) + throw new InvalidKerasConfigurationException( + "Keras LSTM layer accepts only one input (received " + inputType.length + ")"); + InputPreProcessor preprocessor = null; + if (inputType[0] instanceof InputType.InputTypeFeedForward) { + preprocessor = new FeedForwardToRnnPreProcessor(); + } + return preprocessor; + } + + + /** * Set weights for layer. * diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/trainedmodels/TrainedModelHelper.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/trainedmodels/TrainedModelHelper.java index 9e0aeeab3c79..6bf6742fb05e 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/trainedmodels/TrainedModelHelper.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/trainedmodels/TrainedModelHelper.java @@ -2,8 +2,8 @@ import org.apache.commons.io.FileUtils; import org.deeplearning4j.nn.graph.ComputationGraph; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.KerasModelImport; +import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java index 490e8c9064cf..64a10c3f2808 100644 --- a/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java +++ b/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java @@ -23,7 +23,7 @@ import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; -import org.deeplearning4j.nn.modelimport.keras.layers.*; +import org.deeplearning4j.nn.modelimport.keras.layers.KerasInput; import org.deeplearning4j.nn.modelimport.keras.layers.advanced.activations.KerasLeakyReLU; import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.*; import org.deeplearning4j.nn.modelimport.keras.layers.core.*; diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java index 8c36c7d5c451..c53365487c54 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java @@ -38,15 +38,15 @@ public class Keras1ModelConfigurationTest { private ClassLoader classLoader = getClass().getClassLoader(); -// @Test -// public void imdbLstmTfSequentialConfigTest() throws Exception { -// runSequentialConfigTest("configs/keras1/imdb_lstm_tf_keras_1_config.json"); -// } -// -// @Test -// public void imdbLstmThSequentialConfigTest() throws Exception { -// runSequentialConfigTest("configs/keras1/imdb_lstm_th_keras_1_config.json"); -// } + @Test + public void imdbLstmTfSequentialConfigTest() throws Exception { + runSequentialConfigTest("configs/keras1/imdb_lstm_tf_keras_1_config.json", true); + } + + @Test + public void imdbLstmThSequentialConfigTest() throws Exception { + runSequentialConfigTest("configs/keras1/imdb_lstm_th_keras_1_config.json", true); + } @Test public void mnistMlpTfSequentialConfigTest() throws Exception { diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java index a59de366c2aa..8e3c4eb8d560 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java @@ -38,15 +38,15 @@ public class Keras2ModelConfigurationTest { ClassLoader classLoader = getClass().getClassLoader(); -// @Test -// public void imdbLstmTfSequentialConfigTest() throws Exception { -// runSequentialConfigTest("configs/keras2/imdb_lstm_tf_keras_2_config.json"); -// } -// -// @Test -// public void imdbLstmThSequentialConfigTest() throws Exception { -// runSequentialConfigTest("configs/keras2/imdb_lstm_th_keras_2_config.json"); -// } + @Test + public void imdbLstmTfSequentialConfigTest() throws Exception { + runSequentialConfigTest("configs/keras2/imdb_lstm_tf_keras_2_config.json"); + } + + @Test + public void imdbLstmThSequentialConfigTest() throws Exception { + runSequentialConfigTest("configs/keras2/imdb_lstm_th_keras_2_config.json"); + } @Test public void mnistMlpTfSequentialConfigTest() throws Exception { diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java index ea14c4662705..027ee832bb29 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java @@ -17,9 +17,6 @@ */ package org.deeplearning4j.nn.modelimport.keras.configurations; -import java.io.File; -import java.io.IOException; - import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.modelimport.keras.KerasModelImport; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; @@ -27,6 +24,9 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.junit.Test; +import java.io.File; +import java.io.IOException; + /** * Test import of Keras models. * diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java index 94f27219fb46..bc8f6cb509c8 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java @@ -25,7 +25,6 @@ import org.deeplearning4j.nn.modelimport.keras.layers.custom.KerasLRN; import org.deeplearning4j.nn.modelimport.keras.layers.custom.KerasPoolHelper; import org.deeplearning4j.util.ModelSerializer; -import org.junit.Test; import java.io.File; import java.net.URL; diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java index a6a2503085cc..ae83c8f30650 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java @@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution; import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; @@ -95,7 +96,7 @@ public void buildAtrousConvolution1DLayer(KerasLayerConfiguration conf, Integer assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0); assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0); - assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); assertEquals(KERNEL_SIZE[0], layer.getKernelSize()[0]); assertEquals(STRIDE[0], layer.getStride()[0]); assertEquals(N_OUT, layer.getNOut()); diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java index afe3732a9f69..b6026eae9c7d 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java @@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution; import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; @@ -113,7 +114,7 @@ public void buildAtrousConvolution2DLayer(KerasLayerConfiguration conf, Integer assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0); assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0); - assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); assertArrayEquals(KERNEL_SIZE, layer.getKernelSize()); assertArrayEquals(STRIDE, layer.getStride()); assertEquals(N_OUT, layer.getNOut()); diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java index c2d5378a6cd6..58d67803569e 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java @@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution; import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; @@ -102,7 +103,7 @@ public void buildConvolution1DLayer(KerasLayerConfiguration conf, Integer kerasV assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0); assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0); - assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); assertEquals(KERNEL_SIZE[0], layer.getKernelSize()[0]); assertEquals(STRIDE[0], layer.getStride()[0]); assertEquals(N_OUT, layer.getNOut()); diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java index 3c9b7cec9da3..357a2133d1b3 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java @@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution; import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; @@ -122,7 +123,7 @@ public void buildConvolution2DLayer(KerasLayerConfiguration conf, Integer kerasV assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0); assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0); - assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); assertArrayEquals(KERNEL_SIZE, layer.getKernelSize()); assertArrayEquals(STRIDE, layer.getStride()); assertEquals(N_OUT, layer.getNOut()); diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java new file mode 100644 index 000000000000..2fa9b1e07a39 --- /dev/null +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java @@ -0,0 +1,69 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.modelimport.keras.layers.convolution; + +import org.deeplearning4j.nn.conf.layers.Upsampling1D; +import org.deeplearning4j.nn.conf.layers.Upsampling2D; +import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; +import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasUpsampling1D; +import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasUpsampling2D; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +/** + * @author Max Pumperla + */ +public class KerasUpsampling1DTest { + + private final String LAYER_NAME = "upsampling_1D_layer"; + private int size = 4; + + private Integer keras1 = 1; + private Integer keras2 = 2; + private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); + private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); + + @Test + public void testUpsampling1DLayer() throws Exception { + buildUpsampling1DLayer(conf1, keras1); + buildUpsampling1DLayer(conf2, keras2); + } + + public void buildUpsampling1DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception { + Map layerConfig = new HashMap<>(); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_UPSAMPLING_1D()); + Map config = new HashMap<>(); + config.put(conf.getLAYER_FIELD_UPSAMPLING_1D_SIZE(), size); + config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); + layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); + layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); + + Upsampling1D layer = new KerasUpsampling1D(layerConfig).getUpsampling1DLayer(); + assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(size, layer.getSize()); + } + +} diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java index ddac2e7e6c5b..2251523c91d6 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java @@ -55,7 +55,7 @@ public void testUpsampling2DLayer() throws Exception { public void buildUpsampling2DLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws Exception { Map layerConfig = new HashMap<>(); - layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_MAX_POOLING_1D()); + layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_UPSAMPLING_2D()); Map config = new HashMap<>(); List sizeList = new ArrayList<>(); sizeList.add(size[0]); diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java index 8a7a69916eae..7bb3e258b682 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java @@ -17,6 +17,7 @@ */ package org.deeplearning4j.nn.modelimport.keras.layers.core; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; @@ -85,7 +86,7 @@ void buildDenseLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0); assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0); - assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); assertEquals(N_OUT, layer.getNOut()); } } diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java index 4f55b22db3ab..4e27ef4d32fc 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java @@ -17,6 +17,7 @@ */ package org.deeplearning4j.nn.modelimport.keras.layers.core; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.layers.DropoutLayer; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; @@ -61,7 +62,7 @@ public void buildDropoutLayer(KerasLayerConfiguration conf, Integer kerasVersion DropoutLayer layer = new KerasDropout(layerConfig).getDropoutLayer(); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); } diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java index fc6d48c87223..f4808c3ce561 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java @@ -22,6 +22,7 @@ import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.junit.Test; +import org.nd4j.linalg.learning.config.Nesterovs; import java.util.HashMap; import java.util.Map; @@ -69,6 +70,6 @@ public void buildBatchNormalizationLayer(KerasLayerConfiguration conf, Integer k BatchNormalization layer = new KerasBatchNormalization(layerConfig).getBatchNormalizationLayer(); assertEquals(LAYER_NAME, layer.getLayerName()); assertEquals(epsilon, layer.getEps(), 0.0); - assertEquals(momentum, layer.getMomentum(), 0.0); + assertEquals(momentum, ((Nesterovs)layer.getIUpdater()).getMomentum(), 0.0); } } diff --git a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java index b58924e98e3e..61b81fe964c4 100644 --- a/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java +++ b/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java @@ -17,6 +17,7 @@ */ package org.deeplearning4j.nn.modelimport.keras.layers.recurrent; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.layers.LSTM; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; @@ -101,7 +102,7 @@ void buildLstmLayer(KerasLayerConfiguration conf, Integer kerasVersion) throws E assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, layer.getL1(), 0.0); assertEquals(L2_REGULARIZATION, layer.getL2(), 0.0); - assertEquals(DROPOUT_DL4J, layer.getDropOut(), 0.0); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); assertEquals(lstmForgetBiasDouble, layer.getForgetGateBiasInit(), 0.0); assertEquals(N_OUT, layer.getNOut()); } diff --git a/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_tf_keras_1_config.json b/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_tf_keras_1_config.json index 1d7446d8b4ff..b3301b4b1a0d 100644 --- a/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_tf_keras_1_config.json +++ b/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_tf_keras_1_config.json @@ -36,8 +36,8 @@ "consume_less": "cpu", "stateful": false, "init": "glorot_uniform", - "inner_init": "orthogonal", - "dropout_U": 0.2, + "inner_init": "glorot_uniform", + "dropout_U": 0.0, "dropout_W": 0.2, "input_dim": 128, "return_sequences": false, diff --git a/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_th_keras_1_config.json b/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_th_keras_1_config.json index 1d7446d8b4ff..d94a0c7b4bbd 100644 --- a/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_th_keras_1_config.json +++ b/deeplearning4j-modelimport/src/test/resources/configs/keras1/imdb_lstm_th_keras_1_config.json @@ -18,7 +18,7 @@ null ], "W_regularizer": null, - "dropout": 0.2, + "dropout": 0.2, "output_dim": 128, "input_length": null } @@ -36,8 +36,8 @@ "consume_less": "cpu", "stateful": false, "init": "glorot_uniform", - "inner_init": "orthogonal", - "dropout_U": 0.2, + "inner_init": "glorot_uniform", + "dropout_U": 0.0, "dropout_W": 0.2, "input_dim": 128, "return_sequences": false, diff --git a/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_tf_keras_2_config.json b/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_tf_keras_2_config.json index 77e2557de409..a897107f81e9 100644 --- a/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_tf_keras_2_config.json +++ b/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_tf_keras_2_config.json @@ -35,10 +35,12 @@ "recurrent_activation": "hard_sigmoid", "trainable": true, "recurrent_initializer": { - "class_name": "Orthogonal", + "class_name": "VarianceScaling", "config": { - "seed": null, - "gain": 1.0 + "distribution": "uniform", + "scale": 1.0, + "seed": null, + "mode": "fan_avg" } }, "use_bias": true, @@ -53,7 +55,7 @@ "units": 128, "unit_forget_bias": true, "activity_regularizer": null, - "recurrent_dropout": 0.2, + "recurrent_dropout": 0.0, "kernel_initializer": { "class_name": "VarianceScaling", "config": { diff --git a/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_th_keras_2_config.json b/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_th_keras_2_config.json index d79e70c4ea2e..0ecc5c8aefc9 100644 --- a/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_th_keras_2_config.json +++ b/deeplearning4j-modelimport/src/test/resources/configs/keras2/imdb_lstm_th_keras_2_config.json @@ -35,10 +35,12 @@ "recurrent_activation": "hard_sigmoid", "trainable": true, "recurrent_initializer": { - "class_name": "Orthogonal", + "class_name": "VarianceScaling", "config": { - "seed": null, - "gain": 1.0 + "distribution": "uniform", + "scale": 1.0, + "seed": null, + "mode": "fan_avg" } }, "use_bias": true, @@ -53,7 +55,7 @@ "units": 128, "unit_forget_bias": true, "activity_regularizer": null, - "recurrent_dropout": 0.2, + "recurrent_dropout": 0.0, "kernel_initializer": { "class_name": "VarianceScaling", "config": { diff --git a/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/vptree/VpTreeNodeTest.java b/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/vptree/VpTreeNodeTest.java index 3667812b0127..275dfc8292ef 100644 --- a/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/vptree/VpTreeNodeTest.java +++ b/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/vptree/VpTreeNodeTest.java @@ -18,13 +18,12 @@ package org.deeplearning4j.clustering.vptree; -import com.google.common.util.concurrent.AtomicDouble; -import org.nd4j.linalg.primitives.Counter; import org.deeplearning4j.clustering.sptree.DataPoint; import org.junit.Test; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Counter; import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/BinarizeTreeTransformer.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/BinarizeTreeTransformer.java index 1e67d20a8bc9..3f4986a9a6a8 100755 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/BinarizeTreeTransformer.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/BinarizeTreeTransformer.java @@ -19,9 +19,9 @@ package org.deeplearning4j.text.corpora.treeparser; import org.apache.commons.lang3.StringUtils; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.layers.feedforward.autoencoder.recursive.Tree; import org.deeplearning4j.text.corpora.treeparser.transformer.TreeTransformer; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeFactory.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeFactory.java index 29fd9fd8ab17..9e05d606a726 100755 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeFactory.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeFactory.java @@ -25,9 +25,9 @@ import org.cleartk.syntax.constituent.type.TreebankNode; import org.cleartk.syntax.constituent.type.TreebankNodeUtil; import org.cleartk.token.type.Token; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.layers.feedforward.autoencoder.recursive.Tree; import org.deeplearning4j.util.MultiDimensionalMap; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.Arrays; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeParser.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeParser.java index 1cb92c93187a..f07f4b009174 100755 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeParser.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/main/java/org/deeplearning4j/text/corpora/treeparser/TreeParser.java @@ -29,7 +29,6 @@ import org.cleartk.token.type.Sentence; import org.cleartk.token.type.Token; import org.cleartk.util.ParamUtil; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.layers.feedforward.autoencoder.recursive.Tree; import org.deeplearning4j.text.annotator.PoStagger; import org.deeplearning4j.text.annotator.SentenceAnnotator; @@ -41,6 +40,7 @@ import org.deeplearning4j.text.tokenization.tokenizerfactory.UimaTokenizerFactory; import org.deeplearning4j.util.MultiDimensionalMap; import org.deeplearning4j.util.SetUtils; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/util/ContextLabelTest.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/util/ContextLabelTest.java index f8ce6a2c6928..a353d7ad5c7d 100755 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/util/ContextLabelTest.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/util/ContextLabelTest.java @@ -18,12 +18,12 @@ package org.deeplearning4j.util; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.text.movingwindow.ContextLabelRetriever; import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory; import org.deeplearning4j.text.tokenization.tokenizerfactory.UimaTokenizerFactory; import org.junit.Before; import org.junit.Test; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/CnnSentenceDataSetIterator.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/CnnSentenceDataSetIterator.java index 412f353a9348..a4e655a11c8d 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/CnnSentenceDataSetIterator.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/CnnSentenceDataSetIterator.java @@ -2,7 +2,6 @@ import lombok.AllArgsConstructor; import lombok.NonNull; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.iterator.provider.LabelAwareConverter; import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; import org.deeplearning4j.text.documentiterator.LabelAwareDocumentIterator; @@ -20,6 +19,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; import java.util.*; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/CollectionLabeledSentenceProvider.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/CollectionLabeledSentenceProvider.java index b5809d6c35f6..26c25b3bdf0d 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/CollectionLabeledSentenceProvider.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/CollectionLabeledSentenceProvider.java @@ -2,8 +2,8 @@ import lombok.NonNull; import org.datavec.api.util.RandomUtils; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.iterator.LabeledSentenceProvider; +import org.nd4j.linalg.primitives.Pair; import java.util.*; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/FileLabeledSentenceProvider.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/FileLabeledSentenceProvider.java index 5b565507e036..6f3d5bda64b4 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/FileLabeledSentenceProvider.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/FileLabeledSentenceProvider.java @@ -3,9 +3,9 @@ import lombok.NonNull; import org.apache.commons.io.FileUtils; import org.datavec.api.util.RandomUtils; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.iterator.LabeledSentenceProvider; import org.nd4j.linalg.collection.CompactHeapStringList; +import org.nd4j.linalg.primitives.Pair; import java.io.File; import java.io.IOException; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/LabelAwareConverter.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/LabelAwareConverter.java index 503f6bb07da1..2a1f0b2f54d7 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/LabelAwareConverter.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/iterator/provider/LabelAwareConverter.java @@ -1,10 +1,10 @@ package org.deeplearning4j.iterator.provider; import lombok.NonNull; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.iterator.LabeledSentenceProvider; import org.deeplearning4j.text.documentiterator.LabelAwareIterator; import org.deeplearning4j.text.documentiterator.LabelledDocument; +import org.nd4j.linalg.primitives.Pair; import java.util.List; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/learning/impl/elements/GloVe.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/learning/impl/elements/GloVe.java index 4192ab53a3e6..b17f7777862c 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/learning/impl/elements/GloVe.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/learning/impl/elements/GloVe.java @@ -1,8 +1,6 @@ package org.deeplearning4j.models.embeddings.learning.impl.elements; import lombok.NonNull; -import org.nd4j.linalg.primitives.Counter; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.embeddings.WeightLookupTable; import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; import org.deeplearning4j.models.embeddings.learning.ElementsLearningAlgorithm; @@ -15,6 +13,8 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.legacy.AdaGrad; +import org.nd4j.linalg.primitives.Counter; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java index ed1b230bdbd1..8bcf200040c0 100755 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java @@ -27,7 +27,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.io.LineIterator; import org.apache.commons.io.output.CloseShieldOutputStream; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.models.embeddings.WeightLookupTable; import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; @@ -59,6 +58,7 @@ import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.shade.jackson.databind.DeserializationFeature; import org.nd4j.shade.jackson.databind.MapperFeature; import org.nd4j.shade.jackson.databind.ObjectMapper; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/BasicModelUtils.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/BasicModelUtils.java index 6d729ce5d88c..dc2354c67642 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/BasicModelUtils.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/BasicModelUtils.java @@ -5,7 +5,6 @@ import lombok.Data; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; -import org.nd4j.linalg.primitives.Counter; import org.deeplearning4j.models.embeddings.WeightLookupTable; import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; import org.deeplearning4j.models.embeddings.reader.ModelUtils; @@ -16,6 +15,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Counter; import java.util.*; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/FlatModelUtils.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/FlatModelUtils.java index c3bac8891144..ae9a339613c4 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/FlatModelUtils.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/reader/impl/FlatModelUtils.java @@ -1,9 +1,9 @@ package org.deeplearning4j.models.embeddings.reader.impl; -import org.nd4j.linalg.primitives.Counter; import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Counter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java index 73bf2492a664..b04511f4be18 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java @@ -1,7 +1,6 @@ package org.deeplearning4j.models.glove; import lombok.NonNull; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.glove.count.*; import org.deeplearning4j.models.sequencevectors.interfaces.SequenceIterator; import org.deeplearning4j.models.sequencevectors.iterators.FilteredSequenceIterator; @@ -14,6 +13,7 @@ import org.deeplearning4j.text.sentenceiterator.SentenceIterator; import org.deeplearning4j.text.sentenceiterator.SynchronizedSentenceIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/count/CountMap.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/count/CountMap.java index 963714d25573..a723a30f3515 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/count/CountMap.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/count/CountMap.java @@ -1,8 +1,8 @@ package org.deeplearning4j.models.glove.count; import com.google.common.util.concurrent.AtomicDouble; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement; +import org.nd4j.linalg.primitives.Pair; import java.util.Iterator; import java.util.Map; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java index 764c6917eb84..3dd0674f7809 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java @@ -4,8 +4,6 @@ import lombok.Getter; import lombok.NonNull; import lombok.Setter; -import org.nd4j.linalg.primitives.Counter; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.embeddings.WeightLookupTable; import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; import org.deeplearning4j.models.embeddings.learning.ElementsLearningAlgorithm; @@ -34,6 +32,8 @@ import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Counter; +import org.nd4j.linalg.primitives.Pair; import java.util.*; import java.util.concurrent.*; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/inmemory/InMemoryLookupCache.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/inmemory/InMemoryLookupCache.java index 9722214d8b60..8774cb4e3e22 100755 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/inmemory/InMemoryLookupCache.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/inmemory/InMemoryLookupCache.java @@ -18,12 +18,12 @@ package org.deeplearning4j.models.word2vec.wordstore.inmemory; -import org.nd4j.linalg.primitives.Counter; import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.models.word2vec.wordstore.VocabCache; import org.deeplearning4j.text.movingwindow.Util; import org.deeplearning4j.util.Index; import org.deeplearning4j.util.SerializationUtils; +import org.nd4j.linalg.primitives.Counter; import java.io.File; import java.io.InputStream; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/invertedindex/InvertedIndex.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/invertedindex/InvertedIndex.java index 69015950ee6e..714bde9d55c6 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/invertedindex/InvertedIndex.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/invertedindex/InvertedIndex.java @@ -19,8 +19,8 @@ package org.deeplearning4j.text.invertedindex; import com.google.common.base.Function; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement; +import org.nd4j.linalg.primitives.Pair; import java.io.Serializable; import java.util.Collection; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/ContextLabelRetriever.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/ContextLabelRetriever.java index 7255ec87cb72..bd65b87bfa44 100755 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/ContextLabelRetriever.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/ContextLabelRetriever.java @@ -18,11 +18,11 @@ package org.deeplearning4j.text.movingwindow; -import org.deeplearning4j.util.StringUtils; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer; import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory; import org.deeplearning4j.util.MultiDimensionalMap; +import org.deeplearning4j.util.StringUtils; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.List; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/Util.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/Util.java index 924051a47fa0..2c691e019e05 100755 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/Util.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/movingwindow/Util.java @@ -22,7 +22,6 @@ import org.nd4j.linalg.primitives.CounterMap; import java.util.List; -import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; diff --git a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/glove/AbstractCoOccurrencesTest.java b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/glove/AbstractCoOccurrencesTest.java index 5ac40a9d29f7..e18182bb2bff 100644 --- a/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/glove/AbstractCoOccurrencesTest.java +++ b/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/glove/AbstractCoOccurrencesTest.java @@ -1,7 +1,6 @@ package org.deeplearning4j.models.glove; import org.datavec.api.util.ClassPathResource; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.sequencevectors.iterators.AbstractSequenceIterator; import org.deeplearning4j.models.sequencevectors.transformers.impl.SentenceTransformer; import org.deeplearning4j.models.word2vec.VocabWord; @@ -13,6 +12,7 @@ import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory; import org.junit.Before; import org.junit.Test; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIterator.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIterator.java index b6b4cbd12bc5..54677169a5d2 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIterator.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/AbstractDataSetIterator.java @@ -1,12 +1,12 @@ package org.deeplearning4j.datasets.iterator; import lombok.NonNull; -import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.DataSetPreProcessor; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.Iterator; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/INDArrayDataSetIterator.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/INDArrayDataSetIterator.java index 2d4414e3cc7a..e4bbefafe6cf 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/INDArrayDataSetIterator.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/INDArrayDataSetIterator.java @@ -1,8 +1,8 @@ package org.deeplearning4j.datasets.iterator; import lombok.NonNull; -import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * @author raver119@gmail.com diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java index c7c7fbd2921c..c461a30d2092 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java @@ -2,10 +2,10 @@ import lombok.EqualsAndHashCode; import lombok.Getter; +import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.primitives.AtomicBoolean; import org.nd4j.linalg.primitives.AtomicDouble; import org.nd4j.linalg.primitives.Pair; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.primitives.serde.JsonDeserializerAtomicBoolean; import org.nd4j.linalg.primitives.serde.JsonDeserializerAtomicDouble; import org.nd4j.linalg.primitives.serde.JsonSerializerAtomicBoolean; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java index 414db173f4cf..69beee2c56e1 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java @@ -22,8 +22,6 @@ import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; -import org.nd4j.linalg.primitives.Counter; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.eval.meta.Prediction; import org.deeplearning4j.eval.serde.ConfusionMatrixDeserializer; import org.deeplearning4j.eval.serde.ConfusionMatrixSerializer; @@ -37,6 +35,8 @@ import org.nd4j.linalg.indexing.conditions.Conditions; import org.nd4j.linalg.lossfunctions.serde.RowVectorDeserializer; import org.nd4j.linalg.lossfunctions.serde.RowVectorSerializer; +import org.nd4j.linalg.primitives.Counter; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; import org.nd4j.shade.jackson.databind.annotation.JsonDeserialize; import org.nd4j.shade.jackson.databind.annotation.JsonSerialize; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/EvaluationUtils.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/EvaluationUtils.java index 1ba2c13e727c..d4cae93bd7e3 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/EvaluationUtils.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/EvaluationUtils.java @@ -1,9 +1,9 @@ package org.deeplearning4j.eval; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.util.TimeSeriesUtils; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/ROC.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/ROC.java index c848c2468ea1..7f46be8b5954 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/ROC.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/ROC.java @@ -2,7 +2,6 @@ import lombok.*; import org.apache.commons.lang3.ArrayUtils; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.eval.curves.PrecisionRecallCurve; import org.deeplearning4j.eval.curves.RocCurve; import org.deeplearning4j.eval.serde.ROCSerializer; @@ -15,6 +14,7 @@ import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.indexing.conditions.Condition; import org.nd4j.linalg.indexing.conditions.Conditions; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; import org.nd4j.shade.jackson.annotation.JsonTypeInfo; import org.nd4j.shade.jackson.databind.annotation.JsonSerialize; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java index 5ac6ddb7115c..a0fe0778c2c6 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java @@ -31,6 +31,7 @@ import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.Arrays; @@ -133,7 +134,7 @@ public static boolean checkGradients(MultiLayerNetwork mln, double epsilon, doub IUpdater u = bl.getIUpdater(); if (u instanceof Sgd) { //Must have LR of 1.0 - double lr = bl.getLearningRate(); + double lr = ((Sgd) u).getLearningRate(); if (lr != 1.0) { throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer " + layerCount + "; got " + u + " with lr=" + lr + " for layer \"" @@ -155,10 +156,9 @@ public static boolean checkGradients(MultiLayerNetwork mln, double epsilon, doub } } - double dropout = n.getLayer().getDropOut(); - if (dropout != 0.0) { - throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " - + dropout + " for layer " + layerCount); + if (n.getLayer().getIDropout() != null) { + throw new IllegalStateException("Must have no dropout for gradient checks - got dropout = " + + n.getLayer().getIDropout() + " for layer " + layerCount); } } @@ -175,7 +175,7 @@ public static boolean checkGradients(MultiLayerNetwork mln, double epsilon, doub Pair gradAndScore = mln.gradientAndScore(); Updater updater = UpdaterCreator.getUpdater(mln); - updater.update(mln, gradAndScore.getFirst(), 0, mln.batchSize()); + updater.update(mln, gradAndScore.getFirst(), 0, 0, mln.batchSize()); INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done) INDArray originalParams = mln.params().dup(); //need dup: params are a *view* of full parameters @@ -313,7 +313,7 @@ public static boolean checkGradients(ComputationGraph graph, double epsilon, dou IUpdater u = bl.getIUpdater(); if (u instanceof Sgd) { //Must have LR of 1.0 - double lr = bl.getLearningRate(); + double lr = ((Sgd) u).getLearningRate(); if (lr != 1.0) { throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer " + layerCount + "; got " + u + " with lr=" + lr + " for layer \"" @@ -335,10 +335,9 @@ public static boolean checkGradients(ComputationGraph graph, double epsilon, dou } } - double dropout = lv.getLayerConf().getLayer().getDropOut(); - if (dropout != 0.0) { - throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " - + dropout + " for layer " + layerCount); + if (lv.getLayerConf().getLayer().getIDropout() != null) { + throw new IllegalStateException("Must have no dropout for gradient checks - got dropout = " + + lv.getLayerConf().getLayer().getIDropout() + " for layer " + layerCount); } } @@ -358,7 +357,7 @@ public static boolean checkGradients(ComputationGraph graph, double epsilon, dou Pair gradAndScore = graph.gradientAndScore(); ComputationGraphUpdater updater = new ComputationGraphUpdater(graph); - updater.update(gradAndScore.getFirst(), 0, graph.batchSize()); + updater.update(gradAndScore.getFirst(), 0, 0, graph.batchSize()); INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done) INDArray originalParams = graph.params().dup(); //need dup: params are a *view* of full parameters @@ -474,7 +473,7 @@ public static boolean checkGradientsPretrainLayer(Layer layer, double epsilon, d Pair gradAndScore = layer.gradientAndScore(); Updater updater = UpdaterCreator.getUpdater(layer); - updater.update(layer, gradAndScore.getFirst(), 0, layer.batchSize()); + updater.update(layer, gradAndScore.getFirst(), 0, 0, layer.batchSize()); INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done) INDArray originalParams = layer.params().dup(); //need dup: params are a *view* of full parameters diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java index 494afa7ba6ce..0ce811102823 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java @@ -19,11 +19,11 @@ package org.deeplearning4j.nn.api; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.optimize.api.IterationListener; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.io.Serializable; import java.util.Collection; @@ -77,45 +77,6 @@ enum TrainingMode { */ Type type(); - /** - * Calculate error with respect to the - * current layer. - * - * This gradient will contain the error signal - * @param input the gradient for the forward layer - * If this is the final layer, it will start - * with the error from the output. - * This is on the user to initialize. - * @return the gradient wrt the parameters - * on the current layer - * @deprecated As of 0.7.3 - Feb 2017. No longer used. - */ - @Deprecated - Gradient error(INDArray input); - - - - /** - * Take the derivative of the given input - * based on the activation - * @param input the input to take the derivative of - * @return the derivative of the action - * @deprecated As of 0.7.3 - Feb 2017. No longer used. - */ - @Deprecated - INDArray derivativeActivation(INDArray input); - - - /** - * Calculate the gradient - * @param layerError the layer error - * @param indArray - * @return the gradient - * @deprecated As of 0.7.3 - Feb 2017. No longer used. - */ - @Deprecated - Gradient calcGradient(Gradient layerError, INDArray indArray); - /**Calculate the gradient relative to the error in the next layer * @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where C @@ -126,26 +87,6 @@ enum TrainingMode { */ Pair backpropGradient(INDArray epsilon); - - /** - * Parameter averaging - * @param layer the layer to merge - * @param batchSize the batch size to merge on - * @deprecated As of 0.7.3 - Feb 2017. No longer used. Merging (for parameter averaging) done via alternative means - */ - @Deprecated - void merge(Layer layer, int batchSize); - - - /** - * Calculate the mean representation - * for the activation for this layer - * @return the activation mean for this layer - * @deprecated As of 0.7.3 - Feb 2017. No longer used. - */ - @Deprecated - INDArray activationMean(); - /** * Raw activations * @param x the input to transform @@ -230,6 +171,7 @@ enum TrainingMode { * * @return the transposed layer */ + @Deprecated Layer transpose(); /** @@ -265,6 +207,26 @@ enum TrainingMode { */ int getIndex(); + /** + * @return The current iteration count (number of parameter updates) for the layer/network + */ + int getIterationCount(); + + /** + * @return The current epoch count (number of training epochs passed) for the layer/network + */ + int getEpochCount(); + + /** + * Set the current iteration count (number of parameter updates) for the layer/network + */ + void setIterationCount(int iterationCount); + + /** + * Set the current epoch count (number of epochs passed ) for the layer/network + */ + void setEpochCount(int epochCount); + /** * Get the layer input. */ @@ -300,6 +262,9 @@ enum TrainingMode { boolean isPretrainLayer(); + void clearNoiseWeightParams(); + + /** * Feed forward the input mask array, setting in in the layer as appropriate. This allows different layers to * handle masks differently - for example, bidirectional RNNs and normal RNNs operate differently with masks (the diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java index 954bf2a72d29..b8f64ce3926a 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Model.java @@ -18,12 +18,12 @@ package org.deeplearning4j.nn.api; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.IterationListener; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Collection; import java.util.Map; @@ -144,14 +144,6 @@ public interface Model { */ void setBackpropGradientsViewArray(INDArray gradients); - /** - * Update learningRate using for this model. - * Use the learningRateScoreBasedDecay to adapt the score - * if the Eps termination condition is met - */ - void applyLearningRateScoreDecay(); - - /** * Fit the model to the given data * @param data the data to fit the model to diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/NeuralNetworkPrototype.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/NeuralNetworkPrototype.java deleted file mode 100644 index 4cfc90796b4d..000000000000 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/NeuralNetworkPrototype.java +++ /dev/null @@ -1,70 +0,0 @@ -package org.deeplearning4j.nn.api; - -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.api.DataSet; -import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; - -import java.util.Map; - -/** - * - * @author Alex Black - * @author raver119@gmail.com - */ -public interface NeuralNetworkPrototype { - /* - Model params section - */ - INDArray getParams(); - - Updater getUpdater(); - - double getScore(); - - T getConfiguration(); - - /* - Layers section - */ - // however, we can replicate to actual structure - Layer[] getLayers(); - - - /* - Fitting section - */ - // we should have unified dataset here - void fit(DataSet dataSet); - - // should be unified iterator too - void fit(DataSetIterator iterator); - - // same, iterator unification would be nice to see here - void pretrain(DataSetIterator iterator); - - - /* - Output section - */ - Map activations(INDArray input); - - INDArray output(INDArray input); - - INDArray[] output(INDArray... input); - - - /* - RNN section - */ - void rnnClearPreviousState(); - - Map> rnnGetPreviousStates(); - - void rnnTimeStep(INDArray... input); - - - /* - Evaluation section - */ - // why exactly we have Evaluation class AND evaluation code in MLN/CG at the same time? -} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java index a4edac1632ea..dc7b3c1df63d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java @@ -33,5 +33,5 @@ public interface Updater extends Serializable { * @param gradient * @param iteration */ - void update(Layer layer, Gradient gradient, int iteration, int miniBatchSize); + void update(Layer layer, Gradient gradient, int iteration, int epoch, int miniBatchSize); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java index 11d0fc5eb8cb..0b515e8fe004 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java @@ -1,11 +1,9 @@ package org.deeplearning4j.nn.api.layers; -import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.Layer; import org.nd4j.shade.jackson.annotation.JsonTypeInfo; import java.io.Serializable; -import java.util.List; import java.util.Set; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java index 33310ef6c049..4a5426e29742 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java @@ -16,10 +16,10 @@ package org.deeplearning4j.nn.api.layers; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Map; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java index 4864e8076917..b714eb67cf70 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java @@ -639,6 +639,19 @@ public GraphBuilder addLayer(String layerName, Layer layer, String... layerInput return addLayer(layerName, layer, null, layerInputs); } + /** + * Add a layer, with no {@link InputPreProcessor}, with the specified name and specified inputs. + * + * @param layerName Name/label of the layer to add + * @param layer The layer configuration + * @param layerInputs Inputs to this layer (must be 1 or more). Inputs may be other layers, GraphVertex objects, + * on a combination of the two. + * @see #addLayer(String, Layer, InputPreProcessor, String...) + */ + public GraphBuilder layer(String layerName, Layer layer, String... layerInputs) { + return addLayer(layerName, layer, null, layerInputs); + } + /** * Add a layer and an {@link InputPreProcessor}, with the specified name and specified inputs. * @@ -657,6 +670,20 @@ public GraphBuilder addLayer(String layerName, Layer layer, InputPreProcessor pr return this; } + /** + * Add a layer and an {@link InputPreProcessor}, with the specified name and specified inputs. + * + * @param layerName Name/label of the layer to add + * @param layer The layer configuration + * @param preProcessor The InputPreProcessor to use with this layer. + * @param layerInputs Inputs to this layer (must be 1 or more). Inputs may be other layers, GraphVertex objects, + * on a combination of the two. + */ + public GraphBuilder layer(String layerName, Layer layer, InputPreProcessor preProcessor, + String... layerInputs) { + return addLayer(layerName, layer, preProcessor, layerInputs); + } + /** * Intended for use with the transfer learning API. Users discouraged from employing it directly. * Removes the specified vertex from the vertices list, it's connections and associated preprocessor diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java index 061af838be3c..1647fac39550 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java @@ -19,11 +19,11 @@ package org.deeplearning4j.nn.conf; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.preprocessor.*; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.shade.jackson.annotation.JsonSubTypes; import org.nd4j.shade.jackson.annotation.JsonTypeInfo; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index 488552a40c29..28b56acde9ae 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -26,8 +26,9 @@ import org.apache.commons.lang3.ClassUtils; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.api.layers.LayerConstraint; -import org.deeplearning4j.nn.conf.constraint.BaseConstraint; import org.deeplearning4j.nn.conf.distribution.Distribution; +import org.deeplearning4j.nn.conf.dropout.Dropout; +import org.deeplearning4j.nn.conf.dropout.IDropout; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -36,13 +37,15 @@ import org.deeplearning4j.nn.conf.serde.ComputationGraphConfigurationDeserializer; import org.deeplearning4j.nn.conf.serde.MultiLayerConfigurationDeserializer; import org.deeplearning4j.nn.conf.stepfunctions.StepFunction; +import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.util.reflections.DL4JSubTypesScanner; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.learning.config.*; +import org.nd4j.linalg.learning.config.IUpdater; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.shade.jackson.databind.*; import org.nd4j.shade.jackson.databind.deser.BeanDeserializerModifier; @@ -85,8 +88,6 @@ public class NeuralNetConfiguration implements Serializable, Cloneable { public static final String CUSTOM_FUNCTIONALITY = "org.deeplearning4j.config.custom.enabled"; protected Layer layer; - @Deprecated - protected double leakyreluAlpha; //batch size: primarily used for conv nets. Will be reinforced if set. protected boolean miniBatch = true; protected int numIterations; @@ -97,19 +98,11 @@ public class NeuralNetConfiguration implements Serializable, Cloneable { //gradient keys used for ensuring order when getting and setting the gradient protected List variables = new ArrayList<>(); //whether to constrain the gradient to unit norm or not - //adadelta - weight for how much to consider previous history protected StepFunction stepFunction; - protected boolean useDropConnect = false; //minimize or maximize objective protected boolean minimize = true; - // Graves LSTM & RNN - protected Map learningRateByParam = new HashMap<>(); protected Map l1ByParam = new HashMap<>(); protected Map l2ByParam = new HashMap<>(); - protected LearningRatePolicy learningRatePolicy = LearningRatePolicy.None; - protected double lrPolicyDecayRate; - protected double lrPolicySteps; - protected double lrPolicyPower; protected boolean pretrain; // this field defines preOutput cache @@ -143,8 +136,6 @@ public NeuralNetConfiguration clone() { clone.stepFunction = clone.stepFunction.clone(); if (clone.variables != null) clone.variables = new ArrayList<>(clone.variables); - if (clone.learningRateByParam != null) - clone.learningRateByParam = new HashMap<>(clone.learningRateByParam); if (clone.l1ByParam != null) clone.l1ByParam = new HashMap<>(clone.l1ByParam); if (clone.l2ByParam != null) @@ -176,7 +167,6 @@ public void clearVariables() { variables.clear(); l1ByParam.clear(); l2ByParam.clear(); - learningRateByParam.clear(); } public void resetVariables() { @@ -186,26 +176,16 @@ public void resetVariables() { } public void setLayerParamLR(String variable) { - double lr = layer.getLearningRateByParam(variable); double l1 = layer.getL1ByParam(variable); if (Double.isNaN(l1)) l1 = 0.0; //Not set double l2 = layer.getL2ByParam(variable); if (Double.isNaN(l2)) l2 = 0.0; //Not set - learningRateByParam.put(variable, lr); l1ByParam.put(variable, l1); l2ByParam.put(variable, l2); } - public double getLearningRateByParam(String variable) { - return learningRateByParam.get(variable); - } - - public void setLearningRateByParam(String variable, double rate) { - learningRateByParam.put(variable, rate); - } - public double getL1ByParam(String variable) { return l1ByParam.get(variable); } @@ -592,49 +572,24 @@ public static class Builder implements Cloneable { protected WeightInit weightInit = WeightInit.XAVIER; protected double biasInit = 0.0; protected Distribution dist = null; - protected double learningRate = 1e-1; - protected double biasLearningRate = Double.NaN; - protected Map learningRateSchedule = null; - protected double lrScoreBasedDecay; protected double l1 = Double.NaN; protected double l2 = Double.NaN; protected double l1Bias = Double.NaN; protected double l2Bias = Double.NaN; - protected double dropOut = 0; - @Deprecated - protected Updater updater = Updater.SGD; + protected IDropout idropOut; + protected IWeightNoise weightNoise; protected IUpdater iUpdater = new Sgd(); - @Deprecated - protected double momentum = Double.NaN; - @Deprecated - protected Map momentumSchedule = null; - @Deprecated - protected double epsilon = Double.NaN; - @Deprecated - protected double rho = Double.NaN; - @Deprecated - protected double rmsDecay = Double.NaN; - @Deprecated - protected double adamMeanDecay = Double.NaN; - @Deprecated - protected double adamVarDecay = Double.NaN; + protected IUpdater biasUpdater = null; protected Layer layer; - @Deprecated - protected double leakyreluAlpha = 0.01; protected boolean miniBatch = true; protected int numIterations = 1; protected int maxNumLineSearchIterations = 5; protected long seed = System.currentTimeMillis(); protected OptimizationAlgorithm optimizationAlgo = OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT; protected StepFunction stepFunction = null; - protected boolean useDropConnect = false; protected boolean minimize = true; protected GradientNormalization gradientNormalization = GradientNormalization.None; protected double gradientNormalizationThreshold = 1.0; - protected LearningRatePolicy learningRatePolicy = LearningRatePolicy.None; - protected double lrPolicyDecayRate = Double.NaN; - protected double lrPolicySteps = Double.NaN; - protected double lrPolicyPower = Double.NaN; protected boolean pretrain = false; protected List allParamConstraints; protected List weightConstraints; @@ -659,12 +614,7 @@ public Builder(NeuralNetConfiguration newConf) { optimizationAlgo = newConf.optimizationAlgo; seed = newConf.seed; stepFunction = newConf.stepFunction; - useDropConnect = newConf.useDropConnect; miniBatch = newConf.miniBatch; - learningRatePolicy = newConf.learningRatePolicy; - lrPolicyDecayRate = newConf.lrPolicyDecayRate; - lrPolicySteps = newConf.lrPolicySteps; - lrPolicyPower = newConf.lrPolicyPower; pretrain = newConf.pretrain; } } @@ -720,18 +670,6 @@ public Builder cacheMode(@NonNull CacheMode cacheMode) { return this; } - /** - * Use drop connect: multiply the weight by a binomial sampling wrt the dropout probability. - * Dropconnect probability is set using {@link #dropOut(double)}; this is the probability of retaining a weight - * - * @param useDropConnect whether to use drop connect or not - * @return the - */ - public Builder useDropConnect(boolean useDropConnect) { - this.useDropConnect = useDropConnect; - return this; - } - /** * Objective function to minimize or maximize cost function * Default set to minimize true. @@ -769,6 +707,7 @@ public Builder layer(Layer layer) { * Options: DefaultStepFunction (default), NegativeDefaultStepFunction * GradientStepFunction (for SGD), NegativeGradientStepFunction */ + @Deprecated public Builder stepFunction(StepFunction stepFunction) { this.stepFunction = stepFunction; return this; @@ -779,9 +718,9 @@ public Builder stepFunction(StepFunction stepFunction) { * Usage:
*
          * {@code .list()
-         * .layer(0,new DenseLayer.Builder()...build())
+         * .layer(new DenseLayer.Builder()...build())
          * ...
-         * .layer(n,new OutputLayer.Builder()...build())
+         * .layer(new OutputLayer.Builder()...build())
          * }
          * 
*/ @@ -823,22 +762,14 @@ public ComputationGraphConfiguration.GraphBuilder graphBuilder() { } /** - * Number of optimization iterations. + * Number of optimization iterations. Should be set to 1 for >99% of use cases (possible exception: + * very tiny full batch dataset training) */ public Builder iterations(int numIterations) { this.numIterations = numIterations; return this; } - /** - * Random number generator seed. Used for reproducability between runs - */ - public Builder seed(int seed) { - this.seed = (long) seed; - Nd4j.getRandom().setSeed(seed); - return this; - } - /** * Random number generator seed. Used for reproducability between runs */ @@ -858,14 +789,6 @@ public Builder optimizationAlgo(OptimizationAlgorithm optimizationAlgo) { return this; } - /** - * @deprecated Now: no-op. Regularization is always used when l1/l2/dropout is > 0 - */ - @Deprecated - public Builder regularization(boolean useRegularization) { - return this; - } - @Override public Builder clone() { try { @@ -882,20 +805,6 @@ public Builder clone() { } } - /** - * Activation function / neuron non-linearity - * Typical values include:
- * "relu" (rectified linear), "tanh", "sigmoid", "softmax", - * "hardtanh", "leakyrelu", "maxout", "softsign", "softplus" - * - * @deprecated Use {@link #activation(Activation)} or - * {@link @activation(IActivation)} - */ - @Deprecated - public Builder activation(String activationFunction) { - return activation(Activation.fromString(activationFunction).getActivationFunction()); - } - /** * Activation function / neuron non-linearity * @@ -913,15 +822,6 @@ public Builder activation(Activation activation) { return activation(activation.getActivationFunction()); } - /** - * @deprecated Use {@link #activation(IActivation)} with leaky relu, setting alpha value directly in constructor. - */ - @Deprecated - public Builder leakyreluAlpha(double leakyreluAlpha) { - this.leakyreluAlpha = leakyreluAlpha; - return this; - } - /** * Weight initialization scheme. * @@ -951,39 +851,6 @@ public Builder dist(Distribution dist) { return this; } - /** - * Learning rate. Defaults to 1e-1 - */ - public Builder learningRate(double learningRate) { - this.learningRate = learningRate; - return this; - } - - /** - * Bias learning rate. Set this to apply a different learning rate to the bias - */ - public Builder biasLearningRate(double biasLearningRate) { - this.biasLearningRate = biasLearningRate; - return this; - } - - /** - * Learning rate schedule. Map of the iteration to the learning rate to apply at that iteration. - */ - public Builder learningRateSchedule(Map learningRateSchedule) { - this.learningRateSchedule = learningRateSchedule; - return this; - } - - /** - * Rate to decrease learningRate by when the score stops improving. - * Learning rate is multiplied by this rate so ideally keep between 0 and 1. - */ - public Builder learningRateScoreBasedDecayRate(double lrScoreBasedDecay) { - this.lrScoreBasedDecay = lrScoreBasedDecay; - return this; - } - /** * L1 regularization coefficient for the weights. */ @@ -1022,8 +889,6 @@ public Builder l2Bias(double l2Bias) { * dropOut(0.0) is a special value / special case - when set to 0.0., dropout is disabled (not applied). Note * that a dropout value of 1.0 is functionally equivalent to no dropout: i.e., 100% probability of retaining * each input activation.
- * When {@link #useDropConnect(boolean)} is set to true (false by default), this method sets the drop connect - * probability instead. *

* Note 1: Dropout is applied at training time only - and is automatically not applied at test time * (for evaluation, etc)
@@ -1037,139 +902,69 @@ public Builder l2Bias(double l2Bias) { *

* * @param inputRetainProbability Dropout probability (probability of retaining each input activation value for a layer) + * @see #dropOut(IDropout) */ public Builder dropOut(double inputRetainProbability) { - this.dropOut = inputRetainProbability; - return this; + return dropOut(new Dropout(inputRetainProbability)); } /** - * Momentum rate - * Used only when Updater is set to {@link Updater#NESTEROVS} + * Set the dropout for all layers in this network * - * @deprecated Use {@code .updater(new Nesterov(momentum))} instead + * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout}, + * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc + * @return */ - @Deprecated - public Builder momentum(double momentum) { - this.momentum = momentum; + public Builder dropOut(IDropout dropout){ + this.idropOut = dropout; return this; } /** - * Momentum schedule. Map of the iteration to the momentum rate to apply at that iteration - * Used only when Updater is set to {@link Updater#NESTEROVS} + * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and + * {@link org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) for the layers in this network. * - * @deprecated Use {@code .updater(Nesterov.builder().momentumSchedule(schedule).build())} instead + * @param weightNoise Weight noise instance to use */ - @Deprecated - public Builder momentumAfter(Map momentumAfter) { - this.momentumSchedule = momentumAfter; + public Builder weightNoise(IWeightNoise weightNoise){ + this.weightNoise = weightNoise; return this; } + /** - * Gradient updater. For example, Updater.SGD for standard stochastic gradient descent, - * Updater.NESTEROV for Nesterov momentum, Updater.RSMPROP for RMSProp, etc.
- * Note: default hyperparameters are used with this method. Use {@link #updater(IUpdater)} to configure - * the updater-specific hyperparameters. - * - * @see Updater + * @deprecated Use {@link #updater(IUpdater)} */ + @Deprecated public Builder updater(Updater updater) { - this.updater = updater; return updater(updater.getIUpdaterWithDefaultConfig()); } /** - * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} + * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam} * or {@link org.nd4j.linalg.learning.config.Nesterovs} * * @param updater Updater to use */ public Builder updater(IUpdater updater) { - //Ensure legacy field is set... - if (updater instanceof Sgd) - this.updater = Updater.SGD; - else if (updater instanceof Adam) - this.updater = Updater.ADAM; - else if (updater instanceof AdaMax) - this.updater = Updater.ADAMAX; - else if (updater instanceof AdaDelta) - this.updater = Updater.ADADELTA; - else if (updater instanceof Nesterovs) - this.updater = Updater.NESTEROVS; - else if (updater instanceof Nadam) - this.updater = Updater.NADAM; - else if (updater instanceof AdaGrad) - this.updater = Updater.ADAGRAD; - else if (updater instanceof RmsProp) - this.updater = Updater.RMSPROP; - else if (updater instanceof NoOp) - this.updater = Updater.NONE; this.iUpdater = updater; return this; } /** - * Ada delta coefficient - * - * @param rho - * @deprecated use {@code .updater(new AdaDelta(rho,epsilon))} intead - */ - @Deprecated - public Builder rho(double rho) { - this.rho = rho; - return this; - } - - - /** - * Epsilon value for updaters: Adam, RMSProp, Adagrad, Adadelta - * - * @param epsilon Epsilon value to use for adagrad or - * @deprecated Use use {@code .updater(Adam.builder().epsilon(epsilon).build())} or similar instead - */ - @Deprecated - public Builder epsilon(double epsilon) { - this.epsilon = epsilon; - return this; - } - - /** - * Decay rate for RMSProp. Only applies if using .updater(Updater.RMSPROP) - * - * @deprecated use {@code .updater(new RmsProp(rmsDecay))} intead - */ - @Deprecated - public Builder rmsDecay(double rmsDecay) { - this.rmsDecay = rmsDecay; - return this; - } - - /** - * Mean decay rate for Adam updater. Only applies if using .updater(Updater.ADAM) - * - * @deprecated use {@code .updater(Adam.builder().beta1(adamMeanDecay).build())} intead - */ - @Deprecated - public Builder adamMeanDecay(double adamMeanDecay) { - this.adamMeanDecay = adamMeanDecay; - return this; - } - - /** - * Variance decay rate for Adam updater. Only applies if using .updater(Updater.ADAM) + * Gradient updater configuration, for the biases only. If not set, biases will use the updater as + * set by {@link #updater(IUpdater)} * - * @deprecated use {@code .updater(Adam.builder().beta2(adamVarDecay).build())} intead + * @param updater Updater to use for bias parameters */ - @Deprecated - public Builder adamVarDecay(double adamVarDecay) { - this.adamVarDecay = adamVarDecay; + public Builder biasUpdater(IUpdater updater){ + this.biasUpdater = updater; return this; } /** * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping etc. + * See {@link GradientNormalization} for details * * @param gradientNormalization Type of normalization to use. Defaults to None. * @see GradientNormalization @@ -1190,46 +985,6 @@ public Builder gradientNormalizationThreshold(double threshold) { return this; } - /** - * Learning rate decay policy. Used to adapt learning rate based on policy. - * - * @param policy Type of policy to use. Defaults to None. - */ - public Builder learningRateDecayPolicy(LearningRatePolicy policy) { - this.learningRatePolicy = policy; - return this; - } - - /** - * Set the decay rate for the learning rate decay policy. - * - * @param lrPolicyDecayRate rate. - */ - public Builder lrPolicyDecayRate(double lrPolicyDecayRate) { - this.lrPolicyDecayRate = lrPolicyDecayRate; - return this; - } - - /** - * Set the number of steps used for learning decay rate steps policy. - * - * @param lrPolicySteps number of steps - */ - public Builder lrPolicySteps(double lrPolicySteps) { - this.lrPolicySteps = lrPolicySteps; - return this; - } - - /** - * Set the power used for learning rate inverse policy. - * - * @param lrPolicyPower power - */ - public Builder lrPolicyPower(double lrPolicyPower) { - this.lrPolicyPower = lrPolicyPower; - return this; - } - /** * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. * See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE @@ -1276,53 +1031,6 @@ public Builder constrainWeights(LayerConstraint... constraints) { return this; } - private void learningRateValidation(String layerName) { - if (learningRatePolicy != LearningRatePolicy.None && Double.isNaN(lrPolicyDecayRate)) { - //LR policy, if used, should have a decay rate. 2 exceptions: Map for schedule, and Poly + power param - if (!(learningRatePolicy == LearningRatePolicy.Schedule && learningRateSchedule != null) - && !(learningRatePolicy == LearningRatePolicy.Poly && !Double.isNaN(lrPolicyPower))) - throw new IllegalStateException("Layer \"" + layerName - + "\" learning rate policy decay rate (lrPolicyDecayRate) must be set to use learningRatePolicy."); - } - switch (learningRatePolicy) { - case Inverse: - case Poly: - if (Double.isNaN(lrPolicyPower)) - throw new IllegalStateException("Layer \"" + layerName - + "\" learning rate policy power (lrPolicyPower) must be set to use " - + learningRatePolicy); - break; - case Step: - case Sigmoid: - if (Double.isNaN(lrPolicySteps)) - throw new IllegalStateException("Layer \"" + layerName - + "\" learning rate policy steps (lrPolicySteps) must be set to use " - + learningRatePolicy); - break; - case Schedule: - if (learningRateSchedule == null) - throw new IllegalStateException("Layer \"" + layerName - + "\" learning rate policy schedule (learningRateSchedule) must be set to use " - + learningRatePolicy); - break; - } - - if (!Double.isNaN(lrPolicyPower) && (learningRatePolicy != LearningRatePolicy.Inverse - && learningRatePolicy != LearningRatePolicy.Poly)) - throw new IllegalStateException("Layer \"" + layerName - + "\" power has been set but will not be applied unless the learning rate policy is set to Inverse or Poly."); - if (!Double.isNaN(lrPolicySteps) && (learningRatePolicy != LearningRatePolicy.Step - && learningRatePolicy != LearningRatePolicy.Sigmoid - && learningRatePolicy != LearningRatePolicy.TorchStep)) - throw new IllegalStateException("Layer \"" + layerName - + "\" steps have been set but will not be applied unless the learning rate policy is set to Step or Sigmoid."); - if ((learningRateSchedule != null) && (learningRatePolicy != LearningRatePolicy.Schedule)) - throw new IllegalStateException("Layer \"" + layerName - + "\" learning rate schedule has been set but will not be applied unless the learning rate policy is set to Schedule."); - - } - //////////////// - /** * Return a configuration based on this builder * @@ -1338,12 +1046,7 @@ public NeuralNetConfiguration build() { conf.optimizationAlgo = optimizationAlgo; conf.seed = seed; conf.stepFunction = stepFunction; - conf.useDropConnect = useDropConnect; conf.miniBatch = miniBatch; - conf.learningRatePolicy = learningRatePolicy; - conf.lrPolicyDecayRate = lrPolicyDecayRate; - conf.lrPolicySteps = lrPolicySteps; - conf.lrPolicyPower = lrPolicyPower; conf.pretrain = pretrain; conf.cacheMode = this.cacheMode; @@ -1361,7 +1064,6 @@ private void configureLayer(Layer layer) { layerName = "Layer not named"; else layerName = layer.getLayerName(); - learningRateValidation(layerName); if (layer != null) { copyConfigToLayer(layerName, layer); @@ -1383,32 +1085,17 @@ private void configureLayer(Layer layer) { sl.setConvolutionMode(convolutionMode); } } - LayerValidation.generalValidation(layerName, layer, useDropConnect, dropOut, l2, l2Bias, - l1, l1Bias, dist, allParamConstraints, weightConstraints, biasConstraints); + LayerValidation.generalValidation(layerName, layer, idropOut, l2, l2Bias, l1, l1Bias, dist, + allParamConstraints, weightConstraints, biasConstraints); } private void copyConfigToLayer(String layerName, Layer layer) { - if (Double.isNaN(layer.getDropOut())) - layer.setDropOut(dropOut); + if (layer.getIDropout() == null) + layer.setIDropout(idropOut); if (layer instanceof BaseLayer) { BaseLayer bLayer = (BaseLayer) layer; - if (Double.isNaN(bLayer.getLearningRate())) - bLayer.setLearningRate(learningRate); - if (Double.isNaN(bLayer.getBiasLearningRate())) { - //Two possibilities when bias LR isn't set for layer: - // (a) If global bias LR *is* set -> set it to that - // (b) Otherwise, set to layer LR (and, by extension, the global LR) - if (!Double.isNaN(biasLearningRate)) { - //Global bias LR is set - bLayer.setBiasLearningRate(biasLearningRate); - } else { - bLayer.setBiasLearningRate(bLayer.getLearningRate()); - } - } - if (bLayer.getLearningRateSchedule() == null) - bLayer.setLearningRateSchedule(learningRateSchedule); if (Double.isNaN(bLayer.getL1())) bLayer.setL1(l1); if (Double.isNaN(bLayer.getL2())) @@ -1419,13 +1106,27 @@ private void copyConfigToLayer(String layerName, Layer layer) { bLayer.setWeightInit(weightInit); if (Double.isNaN(bLayer.getBiasInit())) bLayer.setBiasInit(biasInit); - if (bLayer.getUpdater() == null) - bLayer.setUpdater(updater); - if (bLayer.getIUpdater() == null) { - bLayer.setIUpdater(iUpdater.clone()); + + //Configure weight noise: + if(weightNoise != null && ((BaseLayer) layer).getWeightNoise() == null){ + ((BaseLayer) layer).setWeightNoise(weightNoise.clone()); + } + + //Configure updaters: + if(iUpdater != null && bLayer.getIUpdater() == null){ + bLayer.setIUpdater(iUpdater); } - LayerValidation.updaterValidation(layerName, layer, learningRate, momentum, momentumSchedule, - adamMeanDecay, adamVarDecay, rho, rmsDecay, epsilon); + if(biasUpdater != null && bLayer.getBiasUpdater() == null){ + bLayer.setBiasUpdater(biasUpdater); + } + + if(bLayer.getIUpdater() == null && iUpdater == null && bLayer.initializer().numParams(bLayer) > 0){ + //No updater set anywhere + IUpdater u = new Sgd(); + bLayer.setIUpdater(u); + log.warn("*** No updater configuration is set for layer {} - defaulting to {} ***", layerName, u); + } + if (bLayer.getGradientNormalization() == null) bLayer.setGradientNormalization(gradientNormalization); if (Double.isNaN(bLayer.getGradientNormalizationThreshold())) diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java index bdd43659ae0b..83f74bf2b2f0 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java @@ -8,8 +8,6 @@ import org.nd4j.linalg.indexing.conditions.Conditions; import java.util.Collections; -import java.util.HashSet; -import java.util.List; import java.util.Set; /** diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java index 8853a49e5fd0..db0753e146ed 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java @@ -10,9 +10,7 @@ import org.nd4j.linalg.indexing.BooleanIndexing; import org.nd4j.linalg.indexing.conditions.Conditions; -import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; import java.util.Set; /** diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java index 43cde563557c..170b13c6eefd 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java @@ -6,7 +6,6 @@ import org.nd4j.linalg.factory.Broadcast; import java.util.Collections; -import java.util.HashSet; import java.util.Set; /** diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distributions.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distributions.java index 88cdb172f182..f4863c8bb00f 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distributions.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distributions.java @@ -46,6 +46,18 @@ public static org.nd4j.linalg.api.rng.distribution.Distribution createDistributi BinomialDistribution bd = (BinomialDistribution) dist; return Nd4j.getDistributions().createBinomial(bd.getNumberOfTrials(), bd.getProbabilityOfSuccess()); } + if (dist instanceof LogNormalDistribution) { + LogNormalDistribution lnd = (LogNormalDistribution) dist; + return Nd4j.getDistributions().createLogNormal(lnd.getMean(), lnd.getStd()); + } + if (dist instanceof TruncatedNormalDistribution) { + TruncatedNormalDistribution tnd = (TruncatedNormalDistribution) dist; + return Nd4j.getDistributions().createTruncatedNormal(tnd.getMean(), tnd.getStd()); + } + if (dist instanceof OrthogonalDistribution) { + OrthogonalDistribution od = (OrthogonalDistribution) dist; + return Nd4j.getDistributions().createOrthogonal(od.getGain()); + } throw new RuntimeException("unknown distribution type: " + dist.getClass()); } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/LogNormalDistribution.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/LogNormalDistribution.java new file mode 100644 index 000000000000..537ef65dbded --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/LogNormalDistribution.java @@ -0,0 +1,92 @@ +/*- + * + * * Copyright 2015 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package org.deeplearning4j.nn.conf.distribution; + +import org.nd4j.shade.jackson.annotation.JsonCreator; +import org.nd4j.shade.jackson.annotation.JsonProperty; + +/** + * A log-normal distribution. + * + */ +public class LogNormalDistribution extends Distribution { + + private double mean, std; + + /** + * Create a log-normal distribution + * with the given mean and std + * + * @param mean the mean + * @param std the standard deviation + */ + @JsonCreator + public LogNormalDistribution(@JsonProperty("mean") double mean, @JsonProperty("std") double std) { + this.mean = mean; + this.std = std; + } + + public double getMean() { + return mean; + } + + public void setMean(double mean) { + this.mean = mean; + } + + public double getStd() { + return std; + } + + public void setStd(double std) { + this.std = std; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + long temp; + temp = Double.doubleToLongBits(mean); + result = prime * result + (int) (temp ^ (temp >>> 32)); + temp = Double.doubleToLongBits(std); + result = prime * result + (int) (temp ^ (temp >>> 32)); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + LogNormalDistribution other = (LogNormalDistribution) obj; + if (Double.doubleToLongBits(mean) != Double.doubleToLongBits(other.mean)) + return false; + if (Double.doubleToLongBits(std) != Double.doubleToLongBits(other.std)) + return false; + return true; + } + + public String toString() { + return "LogNormalDistribution{" + "mean=" + mean + ", std=" + std + '}'; + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java new file mode 100644 index 000000000000..a9487c000f70 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java @@ -0,0 +1,78 @@ +/*- + * + * * Copyright 2015 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package org.deeplearning4j.nn.conf.distribution; + +import org.nd4j.shade.jackson.annotation.JsonCreator; +import org.nd4j.shade.jackson.annotation.JsonProperty; + +/** + * Orthogonal distribution. + * + */ +public class OrthogonalDistribution extends Distribution { + + private double gain; + + /** + * Create a log-normal distribution + * with the given mean and std + * + * @param gain the gain + */ + @JsonCreator + public OrthogonalDistribution(@JsonProperty("gain") double gain) { + this.gain = gain; + } + + public double getGain() { + return gain; + } + + public void setGain(double gain) { + this.gain = gain; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + long temp; + temp = Double.doubleToLongBits(gain); + result = prime * result + (int) (temp ^ (temp >>> 32)); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + OrthogonalDistribution other = (OrthogonalDistribution) obj; + if (Double.doubleToLongBits(gain) != Double.doubleToLongBits(other.gain)) + return false; + return true; + } + + public String toString() { + return "OrthogonalDistribution{gain=" + gain + "}"; + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/TruncatedNormalDistribution.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/TruncatedNormalDistribution.java new file mode 100644 index 000000000000..0e3ea1a5d918 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/TruncatedNormalDistribution.java @@ -0,0 +1,92 @@ +/*- + * + * * Copyright 2015 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package org.deeplearning4j.nn.conf.distribution; + +import org.nd4j.shade.jackson.annotation.JsonCreator; +import org.nd4j.shade.jackson.annotation.JsonProperty; + +/** + * A truncated normal distribution. + * + */ +public class TruncatedNormalDistribution extends Distribution { + + private double mean, std; + + /** + * Create a truncated normal distribution + * with the given mean and std + * + * @param mean the mean + * @param std the standard deviation + */ + @JsonCreator + public TruncatedNormalDistribution(@JsonProperty("mean") double mean, @JsonProperty("std") double std) { + this.mean = mean; + this.std = std; + } + + public double getMean() { + return mean; + } + + public void setMean(double mean) { + this.mean = mean; + } + + public double getStd() { + return std; + } + + public void setStd(double std) { + this.std = std; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + long temp; + temp = Double.doubleToLongBits(mean); + result = prime * result + (int) (temp ^ (temp >>> 32)); + temp = Double.doubleToLongBits(std); + result = prime * result + (int) (temp ^ (temp >>> 32)); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + TruncatedNormalDistribution other = (TruncatedNormalDistribution) obj; + if (Double.doubleToLongBits(mean) != Double.doubleToLongBits(other.mean)) + return false; + if (Double.doubleToLongBits(std) != Double.doubleToLongBits(other.std)) + return false; + return true; + } + + public String toString() { + return "TruncatedNormalDistribution{" + "mean=" + mean + ", std=" + std + '}'; + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/AlphaDropout.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/AlphaDropout.java new file mode 100644 index 000000000000..08b2b1440933 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/AlphaDropout.java @@ -0,0 +1,123 @@ +package org.deeplearning4j.nn.conf.dropout; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NonNull; +import lombok.ToString; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.random.impl.AlphaDropOut; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.schedule.ISchedule; +import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties; +import org.nd4j.shade.jackson.annotation.JsonProperty; + +/** + * AlphaDropout is a dropout technique proposed by Klaumbauer et al. 2017 - Self-Normalizing Neural Networks + * https://arxiv.org/abs/1706.02515
+ *
+ * This dropout technique was designed specifically for self-normalizing neural networks - i.e., networks using + * {@link org.nd4j.linalg.activations.impl.ActivationSELU} / {@link org.nd4j.linalg.activations.Activation#SELU} + * activation function, combined with the N(0,stdev=1/sqrt(fanIn)) "SNN" weight initialization, + * {@link org.deeplearning4j.nn.weights.WeightInit#NORMAL}
+ *
+ * In conjuction with the aforementioned activation function and weight initialization, AlphaDropout attempts to keep + * both the mean and variance of the post-dropout activations to the the same (in expectation) as before alpha + * dropout was applied.
+ * Specifically, AlphaDropout implements a * (x * d + alphaPrime * (1-d)) + b, where d ~ Bernoulli(p), i.e., d \in {0,1}. + * Where x is the input activations, a, b, alphaPrime are constants determined from the SELU alpha/lambda parameters. + * Users should use the default alpha/lambda values in virtually all cases.
+ *
+ * Dropout schedules (i.e., varying probability p as a function of iteration/epoch) are also supported.
+ * + * @author Alex Black + */ +@Data +@EqualsAndHashCode(exclude = {"lastPValue","alphaPrime","a","b"}) +@ToString(exclude = {"lastPValue","alphaPrime","a","b"}) +@JsonIgnoreProperties({"lastPValue", "alphaPrime", "a", "b"}) +public class AlphaDropout implements IDropout { + + public static final double DEFAULT_ALPHA = 1.6732632423543772; + public static final double DEFAULT_LAMBDA = 1.0507009873554804; + + + private final double p; + private final ISchedule pSchedule; + private final double alpha; + private final double lambda; + + private double lastPValue; + private double alphaPrime; + private double a; + private double b; + + /** + * @param activationRetainProbability Probability of retaining an activation. See {@link AlphaDropout} javadoc + */ + public AlphaDropout(double activationRetainProbability){ + this(activationRetainProbability, null, DEFAULT_ALPHA, DEFAULT_LAMBDA); + } + + /** + * @param activationRetainProbabilitySchedule Schedule for the probability of retaining an activation. See + * {@link AlphaDropout} javadoc + */ + public AlphaDropout(@NonNull ISchedule activationRetainProbabilitySchedule){ + this(Double.NaN, activationRetainProbabilitySchedule, DEFAULT_ALPHA, DEFAULT_LAMBDA); + } + + protected AlphaDropout(@JsonProperty("p")double activationRetainProbability, + @JsonProperty("pSchedule") ISchedule activationRetainProbabilitySchedule, + @JsonProperty("alpha") double alpha, @JsonProperty("lambda") double lambda ){ + this.p = activationRetainProbability; + this.pSchedule = activationRetainProbabilitySchedule; + this.alpha = alpha; + this.lambda = lambda; + + this.alphaPrime = -lambda * alpha; + if(activationRetainProbabilitySchedule == null){ + this.lastPValue = p; + this.a = a(p); + this.b = b(p); + } + } + + @Override + public INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace) { + //https://arxiv.org/pdf/1706.02515.pdf pg6 + // "...we propose “alpha dropout”, that randomly sets inputs to α'" + // "The affine transformation a(xd + α'(1−d))+b allows to determine parameters a and b such that mean and + // variance are kept to their values" + + double pValue; + if(pSchedule != null){ + pValue = pSchedule.valueAt(iteration, epoch); + } else { + pValue = p; + } + + if(pValue != lastPValue){ + a = a(pValue); + b = b(pValue); + } + lastPValue = pValue; + + INDArray result = inPlace ? inputActivations : inputActivations.dup(inputActivations.ordering()); + Nd4j.getExecutioner().exec(new AlphaDropOut(result, p, a, alphaPrime, b)); + + return result; + } + + @Override + public AlphaDropout clone() { + return new AlphaDropout(p, pSchedule == null ? null : pSchedule.clone(), alpha, lambda); + } + + public double a(double p){ + return 1.0 / Math.sqrt(p + alphaPrime*alphaPrime * p * (1-p)); + } + + public double b(double p){ + return -a(p) * (1-p)*alphaPrime; + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java new file mode 100644 index 000000000000..e09ebec9ca29 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java @@ -0,0 +1,86 @@ +package org.deeplearning4j.nn.conf.dropout; + +import lombok.Data; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.random.impl.DropOutInverted; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.schedule.ISchedule; +import org.nd4j.shade.jackson.annotation.JsonProperty; + +/** + * Implements standard (inverted) dropout.
+ *
+ * Regarding dropout probability. This is the probability of retaining each input activation value for a layer. + * Thus, each input activation x is independently set to:
+ * x <- 0, with probability 1-p
+ * x <- x/p with probability p
+ * Note that this "inverted" dropout scheme maintains the expected value of activations - i.e., E(x) is the same before + * and after dropout.
+ * Dropout schedules (i.e., varying probability p as a function of iteration/epoch) are also supported.
+ *
+ * Other libraries (notably, Keras) use p == probability(dropping an activation)
+ * In DL4J, {@code new Dropout(x)} will keep an input activation with probability x, and set to 0 with probability 1-x.
+ * Thus, a dropout value of 1.0 is functionally equivalent to no dropout: i.e., 100% probability of retaining + * each input activation.
+ *

+ * Note 1: As per all IDropout instances, dropout is applied at training time only - and is automatically not applied at + * test time (for evaluation, etc)
+ * Note 2: Care should be taken when setting lower (probability of retaining) values for (too much information may be + * lost with aggressive (very low) dropout values).
+ * Note 3: Frequently, dropout is not applied to (or, has higher retain probability for) input (first layer) + * layers. Dropout is also often not applied to output layers.
+ * Note 4: Implementation detail (most users can ignore): DL4J uses inverted dropout, as described here: + * http://cs231n.github.io/neural-networks-2/ + *

+ *
+ * See: Srivastava et al. 2014: Dropout: A Simple Way to Prevent Neural Networks from Overfitting + * http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf + * + * @author Alex Black + */ +@Data +public class Dropout implements IDropout { + + private double p; + private ISchedule pSchedule; + + /** + * @param activationRetainProbability Probability of retaining an activation - see {@link Dropout} javadoc + */ + public Dropout(double activationRetainProbability) { + this(activationRetainProbability, null); + } + + /** + * @param activationRetainProbabilitySchedule Schedule for probability of retaining an activation - see {@link Dropout} javadoc + */ + public Dropout(ISchedule activationRetainProbabilitySchedule){ + this(Double.NaN, activationRetainProbabilitySchedule); + } + + protected Dropout(@JsonProperty("p") double activationRetainProbability, @JsonProperty("pSchedule") ISchedule activationRetainProbabilitySchedule) { + this.p = activationRetainProbability; + this.pSchedule = activationRetainProbabilitySchedule; + } + + + @Override + public INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace) { + double currP; + if(pSchedule != null){ + currP = pSchedule.valueAt(iteration, epoch); + } else { + currP = p; + } + + INDArray result = inPlace ? inputActivations : inputActivations.dup(inputActivations.ordering()); + Nd4j.getExecutioner().exec(new DropOutInverted(result, currP)); + + return result; + } + + @Override + public Dropout clone() { + return new Dropout(p, pSchedule == null ? null : pSchedule.clone()); + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianDropout.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianDropout.java new file mode 100644 index 000000000000..d58fee29a099 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianDropout.java @@ -0,0 +1,79 @@ +package org.deeplearning4j.nn.conf.dropout; + +import lombok.Data; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.impl.transforms.arithmetic.MulOp; +import org.nd4j.linalg.api.ops.random.impl.GaussianDistribution; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.schedule.ISchedule; +import org.nd4j.shade.jackson.annotation.JsonProperty; + +/** + * Gaussian dropout. This is a multiplicative Gaussian noise (mean 1) on the input activations.
+ *
+ * Each input activation x is independently set to:
+ * x <- x * y, where y ~ N(1, stdev = sqrt((1-rate)/rate))
+ * Dropout schedules (i.e., varying probability p as a function of iteration/epoch) are also supported.
+ *
+ * Note 1: As per all IDropout instances, GaussianDropout is applied at training time only - and is automatically not + * applied at test time (for evaluation, etc)
+ * Note 2: Frequently, dropout is not applied to (or, has higher retain probability for) input (first layer) + * layers. Dropout is also often not applied to output layers.
+ *
+ * See: "Multiplicative Gaussian Noise" in Srivastava et al. 2014: Dropout: A Simple Way to Prevent Neural Networks from + * Overfitting http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf + * + * @author Alex Black + */ +@Data +public class GaussianDropout implements IDropout { + + private final double rate; + private final ISchedule rateSchedule; + + /** + * @param rate Rate parameter, see {@link GaussianDropout} + */ + public GaussianDropout(double rate){ + this(rate, null); + } + + /** + * @param rateSchedule Schedule for rate parameter, see {@link GaussianDropout} + */ + public GaussianDropout(ISchedule rateSchedule){ + this(Double.NaN, rateSchedule); + } + + protected GaussianDropout(@JsonProperty("rate") double rate, @JsonProperty("rateSchedule") ISchedule rateSchedule){ + this.rate = rate; + this.rateSchedule = rateSchedule; + } + + @Override + public INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace) { + double r; + if(rateSchedule != null){ + r = rateSchedule.valueAt(iteration, epoch); + } else { + r = rate; + } + + double stdev = Math.sqrt(r / (1.0 - r)); + + INDArray noise = Nd4j.createUninitialized(inputActivations.shape(), inputActivations.ordering()); + Nd4j.getExecutioner().exec(new GaussianDistribution(noise, 1.0, stdev)); + + if(inPlace){ + return inputActivations.muli(noise); + } else { + INDArray result = Nd4j.createUninitialized(inputActivations.shape(), inputActivations.ordering()); + return Nd4j.getExecutioner().execAndReturn(new MulOp(inputActivations, noise, result)); + } + } + + @Override + public GaussianDropout clone() { + return new GaussianDropout(rate, rateSchedule == null ? null : rateSchedule.clone()); + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianNoise.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianNoise.java new file mode 100644 index 000000000000..c42efd6030ec --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianNoise.java @@ -0,0 +1,64 @@ +package org.deeplearning4j.nn.conf.dropout; + +import lombok.Data; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.random.impl.GaussianDistribution; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.schedule.ISchedule; +import org.nd4j.shade.jackson.annotation.JsonProperty; + +/** + * Applies additive, mean-zero Gaussian noise to the input - i.e., x = x + N(0,stddev).
+ * Note that this differs from {@link GaussianDropout}, which applies multiplicative mean-1 N(1,s) noise.
+ * Note also that schedules for the standard deviation value can also be used. + * + * @author Alex Black + */ +@Data +public class GaussianNoise implements IDropout { + + private double stddev; + private ISchedule stddevSchedule; + + /** + * @param stddev Standard deviation for the mean 0 Gaussian noise + */ + public GaussianNoise(double stddev){ + this(stddev, null); + } + + /** + * @param stddevSchedule Schedule for standard deviation for the mean 0 Gaussian noise + */ + public GaussianNoise(ISchedule stddevSchedule){ + this(Double.NaN, stddevSchedule); + } + + protected GaussianNoise(@JsonProperty("stddev") double stddev, @JsonProperty("stddevSchedule") ISchedule stddevSchedule){ + this.stddev = stddev; + this.stddevSchedule = stddevSchedule; + } + + @Override + public INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace) { + double currS; + if(stddevSchedule != null){ + currS = stddevSchedule.valueAt(iteration, epoch); + } else { + currS = stddev; + } + + INDArray result = inPlace ? inputActivations : inputActivations.dup(inputActivations.ordering()); + INDArray noise = Nd4j.createUninitialized(inputActivations.shape(), inputActivations.ordering()); + Nd4j.getExecutioner().exec(new GaussianDistribution(noise, 0, currS)); + + result.addi(noise); + + return result; + } + + @Override + public IDropout clone() { + return new GaussianNoise(stddev, stddevSchedule); + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/IDropout.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/IDropout.java new file mode 100644 index 000000000000..a344d79cae60 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/IDropout.java @@ -0,0 +1,29 @@ +package org.deeplearning4j.nn.conf.dropout; + +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.shade.jackson.annotation.JsonTypeInfo; + +import java.io.Serializable; + +/** + * IDropout instances operate on an activations array, modifying or dropping values at training time only. + * IDropout instances are not applied at test time. + * + * @author Alex Black + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") +public interface IDropout extends Serializable, Cloneable { + + /** + * + * @param inputActivations Input activations array + * @param iteration Current iteration number + * @param epoch Current epoch number + * @param inPlace If true: modify the input activations in-place. False: Copy the input activations and + * apply dropout on the copy instead + * @return + */ + INDArray applyDropout(INDArray inputActivations, int iteration, int epoch, boolean inPlace); + + IDropout clone(); +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java index 003f89ea18fb..d433bb8698a8 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java @@ -69,24 +69,6 @@ public double getL2ByParam(String paramName) { } } - @Override - public double getLearningRateByParam(String paramName) { - switch (paramName) { - case LSTMParamInitializer.INPUT_WEIGHT_KEY: - case LSTMParamInitializer.RECURRENT_WEIGHT_KEY: - return learningRate; - case LSTMParamInitializer.BIAS_KEY: - if (!Double.isNaN(biasLearningRate)) { - //Bias learning rate has been explicitly set - return biasLearningRate; - } else { - return learningRate; - } - default: - throw new IllegalArgumentException("Unknown parameter name: \"" + paramName + "\""); - } - } - @AllArgsConstructor @NoArgsConstructor public static abstract class Builder> extends BaseRecurrentLayer.Builder { diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java index c310d66b98c7..1e560d901455 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java @@ -100,12 +100,6 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { .build(); } - @Override - public double getLearningRateByParam(String paramName) { - //Not applicable - return 0; - } - @Override public void setNIn(InputType inputType, boolean override) { //No op diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java index 043fa77040b7..ffd4f5afed80 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java @@ -83,7 +83,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { int updaterStateSize = (int) getIUpdater().stateSize(numParams); int trainSizePerEx = 0; - if (getDropOut() > 0) { + if (getIDropout() != null) { if (false) { //TODO drop connect //Dup the weights... note that this does NOT depend on the minibatch size... diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java index 018eea28dcb7..54ed891bb0eb 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java @@ -25,13 +25,13 @@ import org.deeplearning4j.nn.conf.LearningRatePolicy; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; +import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; import org.deeplearning4j.nn.weights.WeightInit; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.learning.config.IUpdater; import java.io.Serializable; -import java.util.HashMap; import java.util.Map; /** @@ -45,34 +45,13 @@ public abstract class BaseLayer extends Layer implements Serializable, Cloneable protected WeightInit weightInit; protected double biasInit; protected Distribution dist; - protected double learningRate; - protected double biasLearningRate; - //learning rate after n iterations - protected Map learningRateSchedule; - @Deprecated - protected double momentum; - //momentum after n iterations - @Deprecated - protected Map momentumSchedule; protected double l1; protected double l2; protected double l1Bias; protected double l2Bias; - @Deprecated - protected Updater updater; protected IUpdater iUpdater; - //adadelta - weight for how much to consider previous history - @Deprecated - protected double rho; - //Epsilon value for adagrad and adadelta - @Deprecated - protected double epsilon; - @Deprecated - protected double rmsDecay; - @Deprecated - protected double adamMeanDecay; - @Deprecated - protected double adamVarDecay; + protected IUpdater biasUpdater; + protected IWeightNoise weightNoise; protected GradientNormalization gradientNormalization = GradientNormalization.None; //Clipping, rescale based on l2 norm, etc protected double gradientNormalizationThreshold = 1.0; //Threshold for l2 and element-wise gradient clipping @@ -84,24 +63,15 @@ public BaseLayer(Builder builder) { this.weightInit = builder.weightInit; this.biasInit = builder.biasInit; this.dist = builder.dist; - this.learningRate = builder.learningRate; - this.biasLearningRate = builder.biasLearningRate; - this.learningRateSchedule = builder.learningRateSchedule; - this.momentum = builder.momentum; - this.momentumSchedule = builder.momentumAfter; this.l1 = builder.l1; this.l2 = builder.l2; this.l1Bias = builder.l1Bias; this.l2Bias = builder.l2Bias; - this.updater = builder.updater; this.iUpdater = builder.iupdater; - this.rho = builder.rho; - this.epsilon = builder.epsilon; - this.rmsDecay = builder.rmsDecay; - this.adamMeanDecay = builder.adamMeanDecay; - this.adamVarDecay = builder.adamVarDecay; + this.biasUpdater = builder.biasUpdater; this.gradientNormalization = builder.gradientNormalization; this.gradientNormalizationThreshold = builder.gradientNormalizationThreshold; + this.weightNoise = builder.weightNoise; } /** @@ -111,25 +81,16 @@ public BaseLayer(Builder builder) { */ public void resetLayerDefaultConfig() { //clear the learning related params for all layers in the origConf and set to defaults - this.setUpdater(null); this.setIUpdater(null); - this.setMomentum(Double.NaN); this.setWeightInit(null); this.setBiasInit(Double.NaN); this.setDist(null); - this.setLearningRate(Double.NaN); - this.setBiasLearningRate(Double.NaN); - this.setLearningRateSchedule(null); - this.setMomentumSchedule(null); this.setL1(Double.NaN); this.setL2(Double.NaN); - this.setRho(Double.NaN); - this.setEpsilon(Double.NaN); - this.setRmsDecay(Double.NaN); - this.setAdamMeanDecay(Double.NaN); - this.setAdamVarDecay(Double.NaN); this.setGradientNormalization(GradientNormalization.None); this.setGradientNormalizationThreshold(1.0); + this.iUpdater = null; + this.biasUpdater = null; } @Override @@ -137,27 +98,9 @@ public BaseLayer clone() { BaseLayer clone = (BaseLayer) super.clone(); if (clone.dist != null) clone.dist = clone.dist.clone(); - if (clone.learningRateSchedule != null) - clone.learningRateSchedule = new HashMap<>(clone.learningRateSchedule); - if (clone.momentumSchedule != null) - clone.momentumSchedule = new HashMap<>(clone.momentumSchedule); return clone; } - /** - * Get the updater for the given parameter. Typically the same updater will be used for all updaters, but this - * is not necessarily the case - * - * @param paramName Parameter name - * @return Updater for the parameter - * @deprecated Use {@link #getIUpdaterByParam(String)} - */ - @Deprecated - @Override - public Updater getUpdaterByParam(String paramName) { - return updater; - } - /** * Get the updater for the given parameter. Typically the same updater will be used for all updaters, but this * is not necessarily the case @@ -166,7 +109,10 @@ public Updater getUpdaterByParam(String paramName) { * @return IUpdater for the parameter */ @Override - public IUpdater getIUpdaterByParam(String paramName) { + public IUpdater getUpdaterByParam(String paramName) { + if(biasUpdater != null && initializer().isBiasParam(paramName)){ + return biasUpdater; + } return iUpdater; } @@ -176,46 +122,15 @@ public abstract static class Builder> extends Layer.Builder protected WeightInit weightInit = null; protected double biasInit = Double.NaN; protected Distribution dist = null; - protected double learningRate = Double.NaN; - protected double biasLearningRate = Double.NaN; - protected Map learningRateSchedule = null; - @Deprecated - protected double momentum = Double.NaN; - @Deprecated - protected Map momentumAfter = null; protected double l1 = Double.NaN; protected double l2 = Double.NaN; protected double l1Bias = Double.NaN; protected double l2Bias = Double.NaN; - @Deprecated - protected Updater updater = null; protected IUpdater iupdater = null; - @Deprecated - protected double rho = Double.NaN; - @Deprecated - protected double epsilon = Double.NaN; - @Deprecated - protected double rmsDecay = Double.NaN; - @Deprecated - protected double adamMeanDecay = Double.NaN; - @Deprecated - protected double adamVarDecay = Double.NaN; + protected IUpdater biasUpdater = null; protected GradientNormalization gradientNormalization = null; protected double gradientNormalizationThreshold = Double.NaN; - protected LearningRatePolicy learningRatePolicy = null; - - - /** - * Layer activation function. - * Typical values include:
- * "relu" (rectified linear), "tanh", "sigmoid", "softmax", - * "hardtanh", "leakyrelu", "maxout", "softsign", "softplus" - * @deprecated Use {@link #activation(Activation)} or {@link @activation(IActivation)} - */ - @Deprecated - public T activation(String activationFunction) { - return activation(Activation.fromString(activationFunction)); - } + protected IWeightNoise weightNoise; /** * Set the activation function for the layer. This overload can be used for custom {@link IActivation} instances @@ -265,30 +180,6 @@ public T dist(Distribution dist) { return (T) this; } - /** - * Learning rate. Defaults to 1e-1 - */ - public T learningRate(double learningRate) { - this.learningRate = learningRate; - return (T) this; - } - - /** - * Bias learning rate. Set this to apply a different learning rate to the bias - */ - public T biasLearningRate(double biasLearningRate) { - this.biasLearningRate = biasLearningRate; - return (T) this; - } - - /** - * Learning rate schedule. Map of the iteration to the learning rate to apply at that iteration. - */ - public T learningRateSchedule(Map learningRateSchedule) { - this.learningRateSchedule = learningRateSchedule; - return (T) this; - } - /** * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 regularization * coefficient for the bias. @@ -323,32 +214,13 @@ public T l2Bias(double l2Bias) { return (T) this; } - /** - * Momentum rate. - * @deprecated Use {@code .updater(new Nesterov(momentum))} instead - */ - @Deprecated - public T momentum(double momentum) { - this.momentum = momentum; - return (T) this; - } - - /** - * Momentum schedule. Map of the iteration to the momentum rate to apply at that iteration. - * @deprecated Use {@code .updater(Nesterov.builder().momentumSchedule(schedule).build())} instead - */ - @Deprecated - public T momentumAfter(Map momentumAfter) { - this.momentumAfter = momentumAfter; - return (T) this; - } - /** * Gradient updater. For example, SGD for standard stochastic gradient descent, NESTEROV for Nesterov momentum, * RSMPROP for RMSProp, etc. * * @see Updater */ + @Deprecated public T updater(Updater updater) { return updater(updater.getIUpdaterWithDefaultConfig()); } @@ -365,56 +237,13 @@ public T updater(IUpdater updater) { } /** - * Ada delta coefficient, rho. Only applies if using .updater(Updater.ADADELTA) - * - * @param rho - * @deprecated use {@code .updater(new AdaDelta(rho,epsilon))} intead - */ - @Deprecated - public T rho(double rho) { - this.rho = rho; - return (T) this; - } - - /** - * Decay rate for RMSProp. Only applies if using .updater(Updater.RMSPROP) - * @deprecated use {@code .updater(new RmsProp(rmsDecay))} instead - */ - @Deprecated - public T rmsDecay(double rmsDecay) { - this.rmsDecay = rmsDecay; - return (T) this; - } - - /** - * Epsilon value for updaters: Adam, RMSProp, Adagrad, Adadelta + * Gradient updater configuration, for the biases only. If not set, biases will use the updater as + * set by {@link #updater(IUpdater)} * - * @param epsilon Epsilon value to use - * @deprecated Use use {@code .updater(Adam.builder().epsilon(epsilon).build())} or similar instead - */ - @Deprecated - public T epsilon(double epsilon) { - this.epsilon = epsilon; - return (T) this; - } - - /** - * Mean decay rate for Adam updater. Only applies if using .updater(Updater.ADAM) - * @deprecated use {@code .updater(Adam.builder().beta1(adamMeanDecay).build())} intead - */ - @Deprecated - public T adamMeanDecay(double adamMeanDecay) { - this.adamMeanDecay = adamMeanDecay; - return (T) this; - } - - /** - * Variance decay rate for Adam updater. Only applies if using .updater(Updater.ADAM) - * @deprecated use {@code .updater(Adam.builder().beta2(adamVarDecay).build())} intead + * @param biasUpdater Updater to use for bias parameters */ - @Deprecated - public T adamVarDecay(double adamVarDecay) { - this.adamVarDecay = adamVarDecay; + public T biasUpdater(IUpdater biasUpdater){ + this.biasUpdater = biasUpdater; return (T) this; } @@ -441,14 +270,14 @@ public T gradientNormalizationThreshold(double threshold) { } /** - * Learning rate decay policy. Used to adapt learning rate based on policy. + * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and + * {@link org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) for this layer * - * @param policy Type of policy to use. Defaults to None. - * @see GradientNormalization + * @param weightNoise Weight noise instance to use */ - public T learningRateDecayPolicy(LearningRatePolicy policy) { - this.learningRatePolicy = policy; - return (T) this; + public T weightNoise(IWeightNoise weightNoise){ + this.weightNoise = weightNoise; + return (T)this; } } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java index d9b40feb2925..fa35859c3e1d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java @@ -1,12 +1,12 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.*; -import org.deeplearning4j.nn.api.ParamInitializer; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.ToString; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; -import org.deeplearning4j.nn.params.DefaultParamInitializer; -import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT; @@ -63,7 +63,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { int trainSizeFixed = 0; int trainSizeVariable = 0; - if (getDropOut() > 0) { + if (getIDropout() != null) { if (false) { //TODO drop connect //Dup the weights... note that this does NOT depend on the minibatch size... diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java index cd9d15c43fc8..87850ab4519c 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java @@ -71,30 +71,6 @@ public double getL2ByParam(String paramName) { } } - @Override - public double getLearningRateByParam(String paramName) { - switch (paramName) { - case PretrainParamInitializer.WEIGHT_KEY: - return learningRate; - case PretrainParamInitializer.BIAS_KEY: - if (!Double.isNaN(biasLearningRate)) { - //Bias learning rate has been explicitly set - return biasLearningRate; - } else { - return learningRate; - } - case PretrainParamInitializer.VISIBLE_BIAS_KEY: - if (!Double.isNaN(biasLearningRate)) { - //Bias learning rate has been explicitly set - return biasLearningRate; - } else { - return learningRate; - } - default: - throw new IllegalArgumentException("Unknown parameter name: \"" + paramName + "\""); - } - } - @Override public boolean isPretrainParam(String paramName) { return PretrainParamInitializer.VISIBLE_BIAS_KEY.equals(paramName); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java index d8feb7125c83..232867441e1a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java @@ -1,6 +1,9 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.*; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.ToString; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java new file mode 100644 index 000000000000..1e9c09d0523d --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java @@ -0,0 +1,110 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.conf.layers; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.ToString; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.conf.memory.MemoryReport; +import org.deeplearning4j.nn.params.EmptyParamInitializer; +import org.deeplearning4j.optimize.api.IterationListener; +import org.nd4j.linalg.api.ndarray.INDArray; + +import java.util.Collection; +import java.util.Map; + +/** + * Upsampling base layer + * + * @author Max Pumperla + */ + +@Data +@NoArgsConstructor +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +public abstract class BaseUpsamplingLayer extends Layer { + + protected int size; + + protected BaseUpsamplingLayer(UpsamplingBuilder builder) { + super(builder); + this.size = builder.size; + } + + @Override + public BaseUpsamplingLayer clone() { + BaseUpsamplingLayer clone = (BaseUpsamplingLayer) super.clone(); + return clone; + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + + @Override + public void setNIn(InputType inputType, boolean override) { + //No op: upsampling layer doesn't have nIn value + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException("Invalid input for Upsampling layer (layer name=\"" + getLayerName() + + "\"): input is null"); + } + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); + } + + @Override + public double getL1ByParam(String paramName) { + //Not applicable + return 0; + } + + @Override + public double getL2ByParam(String paramName) { + //Not applicable + return 0; + } + + @Override + public boolean isPretrainParam(String paramName) { + throw new UnsupportedOperationException("UpsamplingLayer does not contain parameters"); + } + + + @NoArgsConstructor + protected static abstract class UpsamplingBuilder> + extends Layer.Builder { + protected int size = 1; + + protected UpsamplingBuilder(int size) { + this.size = size; + } + } + +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java index e75a636bdfd9..7f80569761b4 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java @@ -6,7 +6,6 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; @@ -17,7 +16,10 @@ import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.NoOp; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; /** * Batch normalization configuration @@ -139,35 +141,7 @@ public double getL2ByParam(String paramName) { } @Override - public double getLearningRateByParam(String paramName) { - switch (paramName) { - case BatchNormalizationParamInitializer.BETA: - case BatchNormalizationParamInitializer.GAMMA: - return learningRate; - case BatchNormalizationParamInitializer.GLOBAL_MEAN: - case BatchNormalizationParamInitializer.GLOBAL_VAR: - return 0.0; - default: - throw new IllegalArgumentException("Unknown parameter: \"" + paramName + "\""); - } - } - - @Override - public Updater getUpdaterByParam(String paramName) { - switch (paramName) { - case BatchNormalizationParamInitializer.BETA: - case BatchNormalizationParamInitializer.GAMMA: - return updater; - case BatchNormalizationParamInitializer.GLOBAL_MEAN: - case BatchNormalizationParamInitializer.GLOBAL_VAR: - return Updater.NONE; - default: - throw new IllegalArgumentException("Unknown parameter: \"" + paramName + "\""); - } - } - - @Override - public IUpdater getIUpdaterByParam(String paramName) { + public IUpdater getUpdaterByParam(String paramName) { switch (paramName) { case BatchNormalizationParamInitializer.BETA: case BatchNormalizationParamInitializer.GAMMA: @@ -190,7 +164,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { int updaterStateSize = 0; for (String s : BatchNormalizationParamInitializer.keys()) { - updaterStateSize += getIUpdaterByParam(s).stateSize(nOut); + updaterStateSize += getUpdaterByParam(s).stateSize(nOut); } //During forward pass: working memory size approx. equal to 2x input size (copy ops, etc) diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java index 8b8c2263f8f8..e990dcabb69f 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java @@ -25,13 +25,11 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.CenterLossParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.util.LayerValidation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.NoOp; @@ -91,19 +89,7 @@ public ParamInitializer initializer() { } @Override - @Deprecated - public Updater getUpdaterByParam(String paramName) { - // center loss utilizes alpha directly for this so any updater can be used for other layers - switch (paramName) { - case CenterLossParamInitializer.CENTER_KEY: - return Updater.NONE; - default: - return updater; - } - } - - @Override - public IUpdater getIUpdaterByParam(String paramName) { + public IUpdater getUpdaterByParam(String paramName) { // center loss utilizes alpha directly for this so any updater can be used for other layers switch (paramName) { case CenterLossParamInitializer.CENTER_KEY: @@ -113,25 +99,6 @@ public IUpdater getIUpdaterByParam(String paramName) { } } - @Override - public double getLearningRateByParam(String paramName) { - switch (paramName) { - case CenterLossParamInitializer.WEIGHT_KEY: - return learningRate; - case CenterLossParamInitializer.BIAS_KEY: - if (!Double.isNaN(biasLearningRate)) { - //Bias learning rate has been explicitly set - return biasLearningRate; - } else { - return learningRate; - } - case CenterLossParamInitializer.CENTER_KEY: - return 0; - default: - throw new IllegalStateException("Unknown parameter: \"" + paramName + "\""); - } - } - @Override public double getL1ByParam(String paramName) { switch (paramName) { @@ -182,13 +149,13 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { int nParamsCenter = nIn * nOut; int numParams = nParamsW + nParamsB + nParamsCenter; - int updaterStateSize = (int) (getIUpdaterByParam(CenterLossParamInitializer.WEIGHT_KEY).stateSize(nParamsW) - + getIUpdaterByParam(CenterLossParamInitializer.BIAS_KEY).stateSize(nParamsB) - + getIUpdaterByParam(CenterLossParamInitializer.CENTER_KEY).stateSize(nParamsCenter)); + int updaterStateSize = (int) (getUpdaterByParam(CenterLossParamInitializer.WEIGHT_KEY).stateSize(nParamsW) + + getUpdaterByParam(CenterLossParamInitializer.BIAS_KEY).stateSize(nParamsB) + + getUpdaterByParam(CenterLossParamInitializer.CENTER_KEY).stateSize(nParamsCenter)); int trainSizeFixed = 0; int trainSizeVariable = 0; - if (getDropOut() > 0) { + if (getIDropout() != null) { if (false) { //TODO drop connect //Dup the weights... note that this does NOT depend on the minibatch size... diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java index d53647b0e086..f1ba82a4ba17 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java @@ -21,25 +21,6 @@ import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.*; -import org.deeplearning4j.nn.conf.distribution.Distribution; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; -import org.deeplearning4j.nn.conf.memory.MemoryReport; -import org.deeplearning4j.nn.params.ConvolutionParamInitializer; -import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.util.ConvolutionUtils; -import org.deeplearning4j.util.LayerValidation; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.IActivation; -import org.nd4j.linalg.api.ndarray.INDArray; - -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; /** * 1D convolution layer diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java index bb0b004baafe..66691762b646 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java @@ -4,11 +4,9 @@ import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; -import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; import org.deeplearning4j.util.ConvolutionUtils; import org.nd4j.linalg.api.ndarray.INDArray; @@ -45,7 +43,7 @@ private Convolution1DLayer(Builder builder) { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams) { - org.deeplearning4j.util.LayerValidation.assertNInNOutSet("Convolution1DLayer", getLayerName(), layerIndex, + LayerValidation.assertNInNOutSet("Convolution1DLayer", getLayerName(), layerIndex, getNIn(), getNOut()); org.deeplearning4j.nn.layers.convolution.Convolution1DLayer ret = diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java index ec89f752399f..7b1d8ac8d40b 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java @@ -21,25 +21,6 @@ import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.*; -import org.deeplearning4j.nn.conf.distribution.Distribution; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; -import org.deeplearning4j.nn.conf.memory.MemoryReport; -import org.deeplearning4j.nn.params.ConvolutionParamInitializer; -import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.util.ConvolutionUtils; -import org.deeplearning4j.util.LayerValidation; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.IActivation; -import org.nd4j.linalg.api.ndarray.INDArray; - -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; /** * 2D convolution layer diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java index 919be8fb7e03..8a6a50b57c4f 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java @@ -1,6 +1,9 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.*; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.ToString; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.*; @@ -12,7 +15,6 @@ import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.api.IterationListener; import org.deeplearning4j.util.ConvolutionUtils; -import org.deeplearning4j.util.LayerValidation; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.ndarray.INDArray; @@ -202,23 +204,6 @@ public double getL2ByParam(String paramName) { } } - @Override - public double getLearningRateByParam(String paramName) { - switch (paramName) { - case ConvolutionParamInitializer.WEIGHT_KEY: - return learningRate; - case ConvolutionParamInitializer.BIAS_KEY: - if (!Double.isNaN(biasLearningRate)) { - //Bias learning rate has been explicitly set - return biasLearningRate; - } else { - return learningRate; - } - default: - throw new IllegalArgumentException("Unknown parameter name: \"" + paramName + "\""); - } - } - @Override public LayerMemoryReport getMemoryReport(InputType inputType) { int paramSize = initializer().numParams(this); @@ -252,7 +237,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { trainWorkingSizePerEx = im2colSizePerEx; } - if (getDropOut() > 0) { + if (getIDropout() != null) { //Dup on the input before dropout, but only for training trainWorkingSizePerEx += inputType.arrayElementsPerExample(); } @@ -377,36 +362,6 @@ public Builder dist(Distribution dist) { return this; } - /** - * Learning rate. Defaults to 1e-1 - * - * @param learningRate - */ - @Override - public Builder learningRate(double learningRate) { - return super.learningRate(learningRate); - } - - /** - * Bias learning rate. Set this to apply a different learning rate to the bias - * - * @param biasLearningRate - */ - @Override - public Builder biasLearningRate(double biasLearningRate) { - return super.biasLearningRate(biasLearningRate); - } - - /** - * Learning rate schedule. Map of the iteration to the learning rate to apply at that iteration. - * - * @param learningRateSchedule - */ - @Override - public Builder learningRateSchedule(Map learningRateSchedule) { - return super.learningRateSchedule(learningRateSchedule); - } - /** * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 regularization * coefficient for the bias. @@ -449,26 +404,6 @@ public Builder l2Bias(double l2Bias) { return super.l2Bias(l2Bias); } - /** - * Momentum rate. - * - * @param momentum - */ - @Override - public Builder momentum(double momentum) { - return super.momentum(momentum); - } - - /** - * Momentum schedule. Map of the iteration to the momentum rate to apply at that iteration. - * - * @param momentumAfter - */ - @Override - public Builder momentumAfter(Map momentumAfter) { - return super.momentumAfter(momentumAfter); - } - /** * Gradient updater. For example, SGD for standard stochastic gradient descent, NESTEROV for Nesterov momentum, * RSMPROP for RMSProp, etc. @@ -477,61 +412,11 @@ public Builder momentumAfter(Map momentumAfter) { * @see Updater */ @Override + @Deprecated public Builder updater(Updater updater) { return super.updater(updater); } - /** - * Ada delta coefficient, rho. Only applies if using .updater(Updater.ADADELTA) - * - * @param rho - */ - @Override - public Builder rho(double rho) { - return super.rho(rho); - } - - /** - * Decay rate for RMSProp. Only applies if using .updater(Updater.RMSPROP) - * - * @param rmsDecay - */ - @Override - public Builder rmsDecay(double rmsDecay) { - return super.rmsDecay(rmsDecay); - } - - /** - * Epsilon value for updaters: Adagrad and Adadelta. Only used if using Updater.ADAGRAD or Updater.ADADELTA - * - * @param epsilon Epsilon value to use for adagrad and adadelta - */ - @Override - public Builder epsilon(double epsilon) { - return super.epsilon(epsilon); - } - - /** - * Mean decay rate for Adam updater. Only applies if using .updater(Updater.ADAM) - * - * @param adamMeanDecay - */ - @Override - public Builder adamMeanDecay(double adamMeanDecay) { - return super.adamMeanDecay(adamMeanDecay); - } - - /** - * Variance decay rate for Adam updater. Only applies if using .updater(Updater.ADAM) - * - * @param adamVarDecay - */ - @Override - public Builder adamVarDecay(double adamVarDecay) { - super.adamVarDecay(adamVarDecay); - return this; - } - /** * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping etc. * @@ -558,18 +443,6 @@ public Builder gradientNormalizationThreshold(double threshold) { return this; } - /** - * Learning rate decay policy. Used to adapt learning rate based on policy. - * - * @param policy Type of policy to use. Defaults to None. - * @see GradientNormalization - */ - @Override - public Builder learningRateDecayPolicy(LearningRatePolicy policy) { - super.learningRateDecayPolicy(policy); - return this; - } - /** * Size of the convolution * rows/columns diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java index 45237b36e191..81ecabc78f7a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java @@ -18,7 +18,10 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.*; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.ToString; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -27,7 +30,6 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.util.LayerValidation; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collection; @@ -79,7 +81,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { int trainSizeFixed = 0; int trainSizeVariable = 0; - if (getDropOut() > 0) { + if (getIDropout() != null) { if (false) { //TODO drop connect //Dup the weights... note that this does NOT depend on the minibatch size... diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java index d94479b0576a..1b4d61fabe3a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java @@ -7,6 +7,7 @@ import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; @@ -80,12 +81,6 @@ public double getL2ByParam(String paramName) { return 0; } - @Override - public double getLearningRateByParam(String paramName) { - //Not applicable - return 0; - } - @Override public boolean isPretrainParam(String paramName) { throw new UnsupportedOperationException("Dropout layer does not contain parameters"); @@ -107,8 +102,9 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { @NoArgsConstructor public static class Builder extends FeedForwardLayer.Builder { - public Builder(double dropOut) { - this.dropOut = dropOut; + + public Builder(double dropout){ + this.dropOut(new Dropout(dropout)); } @Override diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java index 76896a7dcce1..8b5bbf41d5e2 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java @@ -1,11 +1,12 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.*; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.ToString; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.constraint.BaseConstraint; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; @@ -14,9 +15,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collection; -import java.util.List; import java.util.Map; -import java.util.Set; /** * Embedding layer: feed-forward layer that expects single integers per example as input (class numbers, in range 0 to numClass-1) diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java index 657a851bed0d..142257d86178 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java @@ -106,23 +106,6 @@ public double getL2ByParam(String paramName) { } } - @Override - public double getLearningRateByParam(String paramName) { - switch (paramName) { - case DefaultParamInitializer.WEIGHT_KEY: - return learningRate; - case DefaultParamInitializer.BIAS_KEY: - if (!Double.isNaN(biasLearningRate)) { - //Bias learning rate has been explicitly set - return biasLearningRate; - } else { - return learningRate; - } - default: - throw new IllegalStateException("Unknown parameter: \"" + paramName + "\""); - } - } - @Override public boolean isPretrainParam(String paramName) { return false; //No pretrain params in standard FF layers diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java index 4927df3f30c9..549d5b413ade 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java @@ -150,12 +150,6 @@ public double getL2ByParam(String paramName) { return 0; } - @Override - public double getLearningRateByParam(String paramName) { - //Not applicable - return 0; - } - @Override public boolean isPretrainParam(String paramName) { throw new UnsupportedOperationException("Global pooling layer does not contain parameters"); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java index 91e9b33c5dbe..d5baa079b436 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java @@ -125,27 +125,6 @@ public double getL2ByParam(String paramName) { } } - @Override - public double getLearningRateByParam(String paramName) { - switch (paramName) { - case GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS: - case GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS: - case GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS: - case GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS: - return learningRate; - case GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS: - case GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS: - if (!Double.isNaN(biasLearningRate)) { - //Bias learning rate has been explicitly set - return biasLearningRate; - } else { - return learningRate; - } - default: - throw new IllegalArgumentException("Unknown parameter name: \"" + paramName + "\""); - } - } - @Override public LayerMemoryReport getMemoryReport(InputType inputType) { return LSTMHelpers.getMemoryReport(this, inputType); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java index 7d79b0448bab..daea0f9ef6e3 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java @@ -28,12 +28,14 @@ import org.deeplearning4j.nn.layers.recurrent.LSTMHelpers; import org.deeplearning4j.nn.params.GravesLSTMParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.util.LayerValidation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; /** * LSTM recurrent net, based on Graves: Supervised Sequence Labelling with Recurrent Neural Networks diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java index 757b71ede86c..5b58744a8e48 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java @@ -23,18 +23,19 @@ import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.constraint.BaseConstraint; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.recurrent.LSTMHelpers; import org.deeplearning4j.nn.params.LSTMParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.util.LayerValidation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; /** * LSTM recurrent net without peephole connections. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java index fa100dbb792b..6dc5960c11ba 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java @@ -24,8 +24,8 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; -import org.deeplearning4j.nn.conf.constraint.BaseConstraint; +import org.deeplearning4j.nn.conf.dropout.Dropout; +import org.deeplearning4j.nn.conf.dropout.IDropout; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer; import org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer; @@ -69,21 +69,25 @@ @JsonSubTypes.Type(value = ZeroPaddingLayer.class, name = "zeroPadding"), @JsonSubTypes.Type(value = ZeroPadding1DLayer.class, name = "zeroPadding1d"), @JsonSubTypes.Type(value = FrozenLayer.class, name = "FrozenLayer"), + @JsonSubTypes.Type(value = Upsampling2D.class, name = "Upsampling2D"), @JsonSubTypes.Type(value = Yolo2OutputLayer.class, name = "Yolo2OutputLayer") }) @Data @NoArgsConstructor public abstract class Layer implements Serializable, Cloneable { protected String layerName; - protected double dropOut; + protected IDropout iDropout; protected List constraints; public Layer(Builder builder) { this.layerName = builder.layerName; - this.dropOut = builder.dropOut; + this.iDropout = builder.iDropout; } + /** + * Initialize the weight constraints. Should be called last, in the outer-most constructor + */ protected void initializeConstraints(Builder builder){ //Note: this has to be done AFTER all constructors have finished - otherwise the required // fields may not yet be set yet @@ -111,7 +115,12 @@ protected void initializeConstraints(Builder builder){ allConstraints.add(c2); } } - this.constraints = allConstraints; + if(allConstraints.size() > 0) { + this.constraints = allConstraints; + } else { + this.constraints = null; + } + this.iDropout = builder.iDropout; } /** @@ -121,7 +130,8 @@ protected void initializeConstraints(Builder builder){ */ public void resetLayerDefaultConfig() { //clear the learning related params for all layers in the origConf and set to defaults - this.setDropOut(Double.NaN); + this.iDropout = null; + this.constraints = null; } @Override @@ -137,6 +147,9 @@ public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfigurati Collection iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams); + /** + * @return The parameter initializer for this model + */ public abstract ParamInitializer initializer(); /** @@ -191,16 +204,6 @@ public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfigurati */ public abstract double getL2ByParam(String paramName); - /** - * Get the (initial) learning rate coefficient for the given parameter. - * Different parameters may be configured to have different learning rates, though commonly all parameters will - * have the same learning rate - * - * @param paramName Parameter name - * @return Initial learning rate value for that parameter - */ - public abstract double getLearningRateByParam(String paramName); - /** * Is the specified parameter a layerwise pretraining only parameter?
* For example, visible bias params in an autoencoder (or, decoder params in a variational autoencoder) aren't @@ -212,20 +215,6 @@ public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfigurati */ public abstract boolean isPretrainParam(String paramName); - /** - * Get the updater for the given parameter. Typically the same updater will be used for all updaters, but this - * is not necessarily the case - * - * @param paramName Parameter name - * @return Updater for the parameter - * @deprecated Use {@link #getIUpdaterByParam(String)} - */ - @Deprecated - public Updater getUpdaterByParam(String paramName) { - throw new UnsupportedOperationException( - "Not supported: all layers with parameters should override this method"); - } - /** * Get the updater for the given parameter. Typically the same updater will be used for all updaters, but this * is not necessarily the case @@ -233,7 +222,7 @@ public Updater getUpdaterByParam(String paramName) { * @param paramName Parameter name * @return IUpdater for the parameter */ - public IUpdater getIUpdaterByParam(String paramName) { + public IUpdater getUpdaterByParam(String paramName) { throw new UnsupportedOperationException( "Not supported: all layers with parameters should override this method"); } @@ -249,10 +238,10 @@ public IUpdater getIUpdaterByParam(String paramName) { @SuppressWarnings("unchecked") public abstract static class Builder> { protected String layerName = null; - protected double dropOut = Double.NaN; protected List allParamConstraints; protected List weightConstraints; protected List biasConstraints; + protected IDropout iDropout; /** * Layer name assigns layer string name. @@ -284,10 +273,21 @@ public T name(String layerName) { *

* * @param inputRetainProbability Dropout probability (probability of retaining each input activation value for a layer) + * @see #dropOut(IDropout) */ public T dropOut(double inputRetainProbability) { - this.dropOut = inputRetainProbability; - return (T) this; + return dropOut(new Dropout(inputRetainProbability)); + } + + /** + * Set the dropout for all layers in this network + * + * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout}, + * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc + */ + public T dropOut(IDropout dropout){ + this.iDropout = dropout; + return (T)this; } /** diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java index 6f15d72be1d7..f7a5efba5d42 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java @@ -1,16 +1,18 @@ package org.deeplearning4j.nn.conf.layers; import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.deeplearning4j.nn.api.layers.LayerConstraint; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.dropout.IDropout; import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.util.OneTimeLogger; -import org.nd4j.linalg.learning.config.*; -import java.util.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; /** * Created by Alex on 22/02/2017. @@ -19,306 +21,47 @@ public class LayerValidation { /** - * Validate the updater configuration - setting the default updater values, if necessary + * Asserts that the layer nIn and nOut values are set for the layer + * + * @param layerType Type of layer ("DenseLayer", etc) + * @param layerName Name of the layer (may be null if not set) + * @param layerIndex Index of the layer + * @param nIn nIn value + * @param nOut nOut value */ - public static void updaterValidation(String layerName, Layer layer, Double learningRate, Double momentum, - Map momentumSchedule, Double adamMeanDecay, Double adamVarDecay, Double rho, - Double rmsDecay, Double epsilon) { - BaseLayer bLayer; - if (layer instanceof FrozenLayer && ((FrozenLayer) layer).getLayer() instanceof BaseLayer) { - bLayer = (BaseLayer) ((FrozenLayer) layer).getLayer(); - } else if (layer instanceof BaseLayer) { - bLayer = (BaseLayer) layer; - } else { - return; + public static void assertNInNOutSet(String layerType, String layerName, int layerIndex, int nIn, int nOut) { + if (nIn <= 0 || nOut <= 0) { + if (layerName == null) + layerName = "(name not set)"; + throw new DL4JInvalidConfigException(layerType + " (index=" + layerIndex + ", name=" + layerName + ") nIn=" + + nIn + ", nOut=" + nOut + "; nIn and nOut must be > 0"); } - updaterValidation(layerName, bLayer, learningRate == null ? Double.NaN : learningRate, - momentum == null ? Double.NaN : momentum, momentumSchedule, - adamMeanDecay == null ? Double.NaN : adamMeanDecay, - adamVarDecay == null ? Double.NaN : adamVarDecay, rho == null ? Double.NaN : rho, - rmsDecay == null ? Double.NaN : rmsDecay, epsilon == null ? Double.NaN : epsilon); } - /** - * Validate the updater configuration - setting the default updater values, if necessary - */ - public static void updaterValidation(String layerName, BaseLayer layer, double learningRate, double momentum, - Map momentumSchedule, double adamMeanDecay, double adamVarDecay, double rho, - double rmsDecay, double epsilon) { - if ((!Double.isNaN(momentum) || !Double.isNaN(layer.getMomentum())) && layer.getUpdater() != Updater.NESTEROVS) - OneTimeLogger.warn(log, "Layer \"" + layerName - + "\" momentum has been set but will not be applied unless the updater is set to NESTEROVS."); - if ((momentumSchedule != null || layer.getMomentumSchedule() != null) - && layer.getUpdater() != Updater.NESTEROVS) - OneTimeLogger.warn(log, "Layer \"" + layerName - + "\" momentum schedule has been set but will not be applied unless the updater is set to NESTEROVS."); - if ((!Double.isNaN(adamVarDecay) || (!Double.isNaN(layer.getAdamVarDecay()))) - && layer.getUpdater() != Updater.ADAM) - OneTimeLogger.warn(log, "Layer \"" + layerName - + "\" adamVarDecay is set but will not be applied unless the updater is set to Adam."); - if ((!Double.isNaN(adamMeanDecay) || !Double.isNaN(layer.getAdamMeanDecay())) - && layer.getUpdater() != Updater.ADAM) - OneTimeLogger.warn(log, "Layer \"" + layerName - + "\" adamMeanDecay is set but will not be applied unless the updater is set to Adam."); - if ((!Double.isNaN(rho) || !Double.isNaN(layer.getRho())) && layer.getUpdater() != Updater.ADADELTA) - OneTimeLogger.warn(log, "Layer \"" + layerName - + "\" rho is set but will not be applied unless the updater is set to ADADELTA."); - if ((!Double.isNaN(rmsDecay) || (!Double.isNaN(layer.getRmsDecay()))) && layer.getUpdater() != Updater.RMSPROP) - OneTimeLogger.warn(log, "Layer \"" + layerName - + "\" rmsdecay is set but will not be applied unless the updater is set to RMSPROP."); - - //Set values from old (deprecated) .epsilon(), .momentum(), etc methods to the built-in updaters - //Note that there are *layer* versions (available via the layer) and *global* versions (via the method args) - //The layer versions take precedence over the global versions. If neither are set, we use whatever is set - // on the IUpdater instance, which may be the default, or may be user-configured - //Note that default values for all other parameters are set by default in the Sgd/Adam/whatever classes - //Hence we don't need to set them here - //Finally: we'll also set the (updater enumeration field to something sane) to avoid updater=SGD, - // iupdater=Adam() type situations. Though the updater field isn't used, we don't want to confuse users - IUpdater u = layer.getIUpdater(); - if (!Double.isNaN(layer.getLearningRate())) { - //Note that for LRs, if user specifies .learningRate(x).updater(Updater.SGD) (for example), we need to set the - // LR in the Sgd object. We can do this using the schedules method, which also works for custom updaters - //Local layer LR set - u.applySchedules(0, layer.getLearningRate()); - } else if (!Double.isNaN(learningRate)) { - //Global LR set - u.applySchedules(0, learningRate); - } - - - if (u instanceof Sgd) { - layer.setUpdater(Updater.SGD); - - } else if (u instanceof Adam) { - Adam a = (Adam) u; - if (!Double.isNaN(layer.getEpsilon())) { - //user has done legacy .epsilon(...) on the layer itself - a.setEpsilon(layer.getEpsilon()); - } else if (!Double.isNaN(epsilon)) { - //user has done legacy .epsilon(...) on MultiLayerNetwork or ComputationGraph - a.setEpsilon(epsilon); - } - - if (!Double.isNaN(layer.getAdamMeanDecay())) { - a.setBeta1(layer.getAdamMeanDecay()); - } else if (!Double.isNaN(adamMeanDecay)) { - a.setBeta1(adamMeanDecay); - } - - if (!Double.isNaN(layer.getAdamVarDecay())) { - a.setBeta2(layer.getAdamVarDecay()); - } else if (!Double.isNaN(adamVarDecay)) { - a.setBeta2(adamVarDecay); - } - - layer.setUpdater(Updater.ADAM); - - } else if (u instanceof AdaDelta) { - AdaDelta a = (AdaDelta) u; - - if (!Double.isNaN(layer.getRho())) { - a.setRho(layer.getRho()); - } else if (!Double.isNaN(rho)) { - a.setRho(rho); - } - - if (!Double.isNaN(layer.getEpsilon())) { - a.setEpsilon(layer.getEpsilon()); - } else if (!Double.isNaN(epsilon)) { - a.setEpsilon(epsilon); - } - - layer.setUpdater(Updater.ADADELTA); - - } else if (u instanceof Nesterovs) { - Nesterovs n = (Nesterovs) u; - if (!Double.isNaN(layer.getMomentum())) { - n.setMomentum(layer.getMomentum()); - } else if (!Double.isNaN(momentum)) { - n.setMomentum(momentum); - } - - if (layer.getMomentumSchedule() != null && !layer.getMomentumSchedule().isEmpty()) { - n.setMomentumSchedule(layer.getMomentumSchedule()); - } else if (momentumSchedule != null && !momentumSchedule.isEmpty()) { - n.setMomentumSchedule(momentumSchedule); - } - layer.setUpdater(Updater.NESTEROVS); - - } else if (u instanceof AdaGrad) { - AdaGrad a = (AdaGrad) u; - if (!Double.isNaN(layer.getEpsilon())) { - a.setEpsilon(layer.getEpsilon()); - } else if (!Double.isNaN(epsilon)) { - a.setEpsilon(epsilon); - } - - layer.setUpdater(Updater.ADAGRAD); - - } else if (u instanceof RmsProp) { - RmsProp r = (RmsProp) u; - - if (!Double.isNaN(layer.getEpsilon())) { - r.setEpsilon(layer.getEpsilon()); - } else if (!Double.isNaN(epsilon)) { - r.setEpsilon(epsilon); - } - if (!Double.isNaN(layer.getRmsDecay())) { - r.setRmsDecay(layer.getRmsDecay()); - } else if (!Double.isNaN(rmsDecay)) { - r.setRmsDecay(rmsDecay); - } - layer.setUpdater(Updater.RMSPROP); - - } else if (u instanceof AdaMax) { - AdaMax a = (AdaMax) u; - - if (!Double.isNaN(layer.getEpsilon())) { - a.setEpsilon(layer.getEpsilon()); - } else if (!Double.isNaN(epsilon)) { - a.setEpsilon(epsilon); - } - - if (!Double.isNaN(layer.getAdamMeanDecay())) { - a.setBeta1(layer.getAdamMeanDecay()); - } else if (!Double.isNaN(adamMeanDecay)) { - a.setBeta1(adamMeanDecay); - } - - if (!Double.isNaN(layer.getAdamVarDecay())) { - a.setBeta2(layer.getAdamVarDecay()); - } else if (!Double.isNaN(adamVarDecay)) { - a.setBeta2(adamVarDecay); - } - layer.setUpdater(Updater.ADAMAX); - - } else if (u instanceof NoOp) { - layer.setUpdater(Updater.NONE); - } else { - //Probably a custom updater - layer.setUpdater(null); - } - - - //Finally: Let's set the legacy momentum, epsilon, rmsDecay fields on the layer - //At this point, it's purely cosmetic, to avoid NaNs etc there that might confuse users - //The *true* values are now in the IUpdater instances - if (layer.getUpdater() != null) { //May be null with custom updaters etc - switch (layer.getUpdater()) { - case NESTEROVS: - if (Double.isNaN(momentum) && Double.isNaN(layer.getMomentum())) { - layer.setMomentum(Nesterovs.DEFAULT_NESTEROV_MOMENTUM); - } else if (Double.isNaN(layer.getMomentum())) - layer.setMomentum(momentum); - if (momentumSchedule != null && layer.getMomentumSchedule() == null) - layer.setMomentumSchedule(momentumSchedule); - else if (momentumSchedule == null && layer.getMomentumSchedule() == null) - layer.setMomentumSchedule(new HashMap()); - break; - case ADAM: - if (Double.isNaN(adamMeanDecay) && Double.isNaN(layer.getAdamMeanDecay())) { - layer.setAdamMeanDecay(Adam.DEFAULT_ADAM_BETA1_MEAN_DECAY); - } else if (Double.isNaN(layer.getAdamMeanDecay())) - layer.setAdamMeanDecay(adamMeanDecay); - - if (Double.isNaN(adamVarDecay) && Double.isNaN(layer.getAdamVarDecay())) { - layer.setAdamVarDecay(Adam.DEFAULT_ADAM_BETA2_VAR_DECAY); - } else if (Double.isNaN(layer.getAdamVarDecay())) - layer.setAdamVarDecay(adamVarDecay); - - if (Double.isNaN(epsilon) && Double.isNaN(layer.getEpsilon())) { - layer.setEpsilon(Adam.DEFAULT_ADAM_EPSILON); - } else if (Double.isNaN(layer.getEpsilon())) { - layer.setEpsilon(epsilon); - } - break; - case ADADELTA: - if (Double.isNaN(rho) && Double.isNaN(layer.getRho())) { - layer.setRho(AdaDelta.DEFAULT_ADADELTA_RHO); - } else if (Double.isNaN(layer.getRho())) { - layer.setRho(rho); - } - - if (Double.isNaN(epsilon) && Double.isNaN(layer.getEpsilon())) { - layer.setEpsilon(AdaDelta.DEFAULT_ADADELTA_EPSILON); - } else if (Double.isNaN(layer.getEpsilon())) { - layer.setEpsilon(epsilon); - } - break; - case ADAGRAD: - if (Double.isNaN(epsilon) && Double.isNaN(layer.getEpsilon())) { - layer.setEpsilon(AdaGrad.DEFAULT_ADAGRAD_EPSILON); - } else if (Double.isNaN(layer.getEpsilon())) { - layer.setEpsilon(epsilon); - } - break; - case RMSPROP: - if (Double.isNaN(rmsDecay) && Double.isNaN(layer.getRmsDecay())) { - layer.setRmsDecay(RmsProp.DEFAULT_RMSPROP_RMSDECAY); - } else if (Double.isNaN(layer.getRmsDecay())) - layer.setRmsDecay(rmsDecay); - - if (Double.isNaN(epsilon) && Double.isNaN(layer.getEpsilon())) { - layer.setEpsilon(RmsProp.DEFAULT_RMSPROP_EPSILON); - } else if (Double.isNaN(layer.getEpsilon())) { - layer.setEpsilon(epsilon); - } - break; - case ADAMAX: - if (Double.isNaN(adamMeanDecay) && Double.isNaN(layer.getAdamMeanDecay())) { - layer.setAdamMeanDecay(AdaMax.DEFAULT_ADAMAX_BETA1_MEAN_DECAY); - } else if (Double.isNaN(layer.getAdamMeanDecay())) - layer.setAdamMeanDecay(adamMeanDecay); - - if (Double.isNaN(adamVarDecay) && Double.isNaN(layer.getAdamVarDecay())) { - layer.setAdamVarDecay(AdaMax.DEFAULT_ADAMAX_BETA2_VAR_DECAY); - } else if (Double.isNaN(layer.getAdamVarDecay())) - layer.setAdamVarDecay(adamVarDecay); - - if (Double.isNaN(epsilon) && Double.isNaN(layer.getEpsilon())) { - layer.setEpsilon(AdaMax.DEFAULT_ADAMAX_EPSILON); - } else if (Double.isNaN(layer.getEpsilon())) { - layer.setEpsilon(epsilon); - } - } - } - } - - public static void generalValidation(String layerName, Layer layer, boolean useDropConnect, Double dropOut, + public static void generalValidation(String layerName, Layer layer, IDropout iDropOut, Double l2, Double l2Bias, Double l1, Double l1Bias, Distribution dist, List allParamConstraints, List weightConstraints, List biasConstraints) { - generalValidation(layerName, layer, useDropConnect, dropOut == null ? 0.0 : dropOut, + generalValidation(layerName, layer, iDropOut, l2 == null ? Double.NaN : l2, l2Bias == null ? Double.NaN : l2Bias, l1 == null ? Double.NaN : l1, l1Bias == null ? Double.NaN : l1Bias, dist, allParamConstraints, weightConstraints, biasConstraints); } - public static void generalValidation(String layerName, Layer layer, boolean useDropConnect, double dropOut, + public static void generalValidation(String layerName, Layer layer, IDropout iDropout, double l2, double l2Bias, double l1, double l1Bias, Distribution dist, List allParamConstraints, List weightConstraints, List biasConstraints) { if (layer != null) { - - if (useDropConnect && (Double.isNaN(dropOut) && (Double.isNaN(layer.getDropOut())))) - OneTimeLogger.warn(log, "Layer \"" + layerName - + "\" dropConnect is set to true but dropout rate has not been added to configuration."); - if (useDropConnect && layer.getDropOut() == 0.0) - OneTimeLogger.warn(log, - "Layer \"" + layerName + " dropConnect is set to true but dropout rate is set to 0.0"); - if (layer instanceof BaseLayer) { BaseLayer bLayer = (BaseLayer) layer; - configureBaseLayer(layerName, bLayer, useDropConnect, dropOut, l2, l2Bias, l1, - l1Bias, dist); + configureBaseLayer(layerName, bLayer, iDropout, l2, l2Bias, l1, l1Bias, dist); } else if (layer instanceof FrozenLayer && ((FrozenLayer) layer).getLayer() instanceof BaseLayer) { BaseLayer bLayer = (BaseLayer) ((FrozenLayer) layer).getLayer(); - configureBaseLayer(layerName, bLayer, useDropConnect, dropOut, l2, l2Bias, l1, - l1Bias, dist); + configureBaseLayer(layerName, bLayer, iDropout, l2, l2Bias, l1, l1Bias, dist); } if(layer.getConstraints() == null || layer.constraints.isEmpty()) { @@ -347,13 +90,17 @@ public static void generalValidation(String layerName, Layer layer, boolean useD } } - layer.setConstraints(allConstraints); + if(allConstraints.size() > 0){ + layer.setConstraints(allConstraints); + } else { + layer.setConstraints(null); + } } } } - private static void configureBaseLayer(String layerName, BaseLayer bLayer, boolean useDropConnect, - Double dropOut, Double l2, Double l2Bias, Double l1, Double l1Bias, + private static void configureBaseLayer(String layerName, BaseLayer bLayer, IDropout iDropout, Double l2, Double l2Bias, + Double l1, Double l1Bias, Distribution dist) { if (!Double.isNaN(l1) && Double.isNaN(bLayer.getL1())) { @@ -382,6 +129,10 @@ private static void configureBaseLayer(String layerName, BaseLayer bLayer, bool bLayer.setL1Bias(0.0); } + if(bLayer.getIDropout() == null){ + bLayer.setIDropout(iDropout); + } + if (bLayer.getWeightInit() == WeightInit.DISTRIBUTION) { if (dist != null && bLayer.getDist() == null) diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java index c340f4b6e83e..190655b9bfbb 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java @@ -101,12 +101,6 @@ public double getL2ByParam(String paramName) { return 0; } - @Override - public double getLearningRateByParam(String paramName) { - //Not applicable - return 0; - } - @Override public boolean isPretrainParam(String paramName) { return false; //No params in LRN diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java index 4e118d418711..13707aad608d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java @@ -27,7 +27,6 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.util.LayerValidation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RBM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RBM.java index 3eebc2f07669..207a826054cc 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RBM.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RBM.java @@ -104,7 +104,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { int updaterStateSize = (int) getIUpdater().stateSize(numParams); int trainSizePerEx = 0; - if (getDropOut() > 0) { + if (getIDropout() != null) { if (false) { //TODO drop connect //Dup the weights... note that this does NOT depend on the minibatch size... diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java index 9db77892efd9..c8464c5839a9 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java @@ -11,7 +11,6 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.util.LayerValidation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java index 606666d52efb..d00ee2611743 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java @@ -4,11 +4,9 @@ import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; -import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; import org.deeplearning4j.util.ConvolutionUtils; import org.nd4j.linalg.api.ndarray.INDArray; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java index 1e71e60e3354..ff988cbc8ff6 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java @@ -151,12 +151,6 @@ public double getL2ByParam(String paramName) { return 0; } - @Override - public double getLearningRateByParam(String paramName) { - //Not applicable - return 0; - } - @Override public boolean isPretrainParam(String paramName) { throw new UnsupportedOperationException("SubsamplingLayer does not contain parameters"); @@ -176,7 +170,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { //Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass int trainingWorkingSizePerEx = im2colSizePerEx; - if (getDropOut() > 0) { + if (getIDropout() != null) { //Dup on the input before dropout, but only for training trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java new file mode 100644 index 000000000000..58e884ab7d65 --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java @@ -0,0 +1,140 @@ +/*- + * + * * Copyright 2017 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.deeplearning4j.nn.conf.layers; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.ToString; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.conf.memory.MemoryReport; +import org.deeplearning4j.nn.params.EmptyParamInitializer; +import org.deeplearning4j.optimize.api.IterationListener; +import org.nd4j.linalg.api.ndarray.INDArray; + +import java.util.Collection; +import java.util.Map; + +/** + * Upsampling 1D layer + * + * @author Max Pumperla + */ + +@Data +@NoArgsConstructor +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +public class Upsampling1D extends BaseUpsamplingLayer { + + protected int size; + + protected Upsampling1D(UpsamplingBuilder builder) { + super(builder); + this.size = builder.size; + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, + Collection iterationListeners, int layerIndex, INDArray layerParamsView, + boolean initializeParams) { + org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling1D ret = + new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling1D(conf); + ret.setListeners(iterationListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setConf(conf); + return ret; + } + + @Override + public Upsampling1D clone() { + Upsampling1D clone = (Upsampling1D) super.clone(); + return clone; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException("Invalid input for 1D Upsampling layer (layer index = " + layerIndex + + ", layer name = \"" + getLayerName() + "\"): expect RNN input type with size > 0. Got: " + + inputType); + } + InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType; + return InputType.recurrent(recurrent.getSize(), recurrent.getTimeSeriesLength()); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException("Invalid input for Upsampling layer (layer name=\"" + getLayerName() + + "\"): input is null"); + } + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType; + InputType.InputTypeRecurrent outputType = (InputType.InputTypeRecurrent) getOutputType(-1, inputType); + + int im2colSizePerEx = recurrent.getSize() * outputType.getTimeSeriesLength() * size; + int trainingWorkingSizePerEx = im2colSizePerEx; + if (getIDropout() != null) { + trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); + } + + return new LayerMemoryReport.Builder(layerName, Upsampling1D.class, inputType, outputType) + .standardMemory(0, 0) //No params + .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) + .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching + .build(); + } + + @NoArgsConstructor + public static class Builder extends UpsamplingBuilder { + + public Builder(int size) { + super(size); + } + + /** + * Upsampling size + * + * @param size upsampling size in height and width dimensions + */ + public Builder size(int size) { + + this.size = size; + return this; + } + + @Override + @SuppressWarnings("unchecked") + public Upsampling1D build() { + return new Upsampling1D(this); + } + } + +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java index 6e355c28b72a..b7d5bf322b21 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java @@ -21,13 +21,11 @@ import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; -import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; -import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; import org.nd4j.linalg.api.ndarray.INDArray; @@ -35,7 +33,7 @@ import java.util.Map; /** - * Upsampling layer + * Upsampling 2D layer * * @author Max Pumperla */ @@ -44,11 +42,11 @@ @NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -public class Upsampling2D extends Layer { +public class Upsampling2D extends BaseUpsamplingLayer { protected int size; - protected Upsampling2D(Upsampling2DBuilder builder) { + protected Upsampling2D(UpsamplingBuilder builder) { super(builder); this.size = builder.size; } @@ -74,11 +72,6 @@ public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, return ret; } - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } - @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.CNN) { @@ -93,11 +86,6 @@ public InputType getOutputType(int layerIndex, InputType inputType) { return InputType.convolutional(size * inHeight, size * inWidth, inDepth); } - @Override - public void setNIn(InputType inputType, boolean override) { - //No op: upsampling layer doesn't have nIn value - } - @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { if (inputType == null) { @@ -107,29 +95,6 @@ public InputPreProcessor getPreProcessorForInputType(InputType inputType) { return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); } - @Override - public double getL1ByParam(String paramName) { - //Not applicable - return 0; - } - - @Override - public double getL2ByParam(String paramName) { - //Not applicable - return 0; - } - - @Override - public double getLearningRateByParam(String paramName) { - //Not applicable - return 0; - } - - @Override - public boolean isPretrainParam(String paramName) { - throw new UnsupportedOperationException("UpsamplingLayer does not contain parameters"); - } - @Override public LayerMemoryReport getMemoryReport(InputType inputType) { InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; @@ -140,7 +105,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { // Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass int trainingWorkingSizePerEx = im2colSizePerEx; - if (getDropOut() > 0) { + if (getIDropout() != null) { //Dup on the input before dropout, but only for training trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); } @@ -154,7 +119,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { @NoArgsConstructor - public static class Builder extends Upsampling2DBuilder { + public static class Builder extends UpsamplingBuilder { public Builder(int size) { super(size); @@ -171,7 +136,6 @@ public Builder size(int size) { return this; } - @Override @SuppressWarnings("unchecked") public Upsampling2D build() { @@ -179,14 +143,4 @@ public Upsampling2D build() { } } - @NoArgsConstructor - protected static abstract class Upsampling2DBuilder> - extends Layer.Builder { - protected int size = 1; - - protected Upsampling2DBuilder(int size) { - this.size = size; - } - } - } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java index eddfec3b6335..a94f6f700838 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java @@ -108,11 +108,6 @@ public double getL2ByParam(String paramName) { return 0; } - @Override - public double getLearningRateByParam(String paramName) { - return 0; - } - @Override public boolean isPretrainParam(String paramName) { throw new UnsupportedOperationException("ZeroPaddingLayer does not contain parameters"); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java index a0ed0bf6f355..47b514c84ea8 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java @@ -106,11 +106,6 @@ public double getL2ByParam(String paramName) { return 0; } - @Override - public double getLearningRateByParam(String paramName) { - return 0; - } - @Override public boolean isPretrainParam(String paramName) { throw new UnsupportedOperationException("ZeroPaddingLayer does not contain parameters"); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java index 56da2b728a53..66abec1e812a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java @@ -3,13 +3,12 @@ import lombok.EqualsAndHashCode; import lombok.Getter; import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.Layer; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; -import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.deeplearning4j.nn.params.FrozenLayerParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; import org.nd4j.linalg.api.ndarray.INDArray; @@ -68,12 +67,10 @@ public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, conf.variables(false).add(s); conf.getL1ByParam().put(s, 0.0); conf.getL2ByParam().put(s, 0.0); - conf.getLearningRateByParam().put(s, 0.0); nncUnderlying.variables(false).add(s); nncUnderlying.getL1ByParam().put(s, 0.0); nncUnderlying.getL2ByParam().put(s, 0.0); - nncUnderlying.getLearningRateByParam().put(s, 0.0); } } @@ -110,23 +107,13 @@ public double getL2ByParam(String paramName) { return 0; } - @Override - public double getLearningRateByParam(String paramName) { - return 0; - } - @Override public boolean isPretrainParam(String paramName) { return false; } @Override - public Updater getUpdaterByParam(String paramName) { - return null; - } - - @Override - public IUpdater getIUpdaterByParam(String paramName) { + public IUpdater getUpdaterByParam(String paramName) { return null; } @@ -141,6 +128,12 @@ public void setLayerName(String layerName) { layer.setLayerName(layerName); } + @Override + public void setConstraints(List constraints){ + this.constraints = constraints; + this.layer.setConstraints(constraints); + } + public static class Builder extends Layer.Builder { private Layer layer; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java index 6dab4bb71b0c..c48fbd83462a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java @@ -1,7 +1,6 @@ package org.deeplearning4j.nn.conf.layers.objdetect; import lombok.Data; -import lombok.Getter; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -18,7 +17,6 @@ import org.nd4j.shade.jackson.databind.annotation.JsonSerialize; import org.nd4j.shade.serde.jackson.VectorDeSerializer; import org.nd4j.shade.serde.jackson.VectorSerializer; -import org.nd4j.shade.serde.jackson.shaded.NDArraySerializer; import java.util.Arrays; import java.util.Collection; @@ -114,11 +112,6 @@ public double getL2ByParam(String paramName) { return 0; //No params } - @Override - public double getLearningRateByParam(String paramName) { - return 0; //No params - } - @Override public boolean isPretrainParam(String paramName) { return false; //No params diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/GaussianReconstructionDistribution.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/GaussianReconstructionDistribution.java index 581a0e308975..b993a39600cf 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/GaussianReconstructionDistribution.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/GaussianReconstructionDistribution.java @@ -33,15 +33,7 @@ public class GaussianReconstructionDistribution implements ReconstructionDistrib * Create a GaussianReconstructionDistribution with the default identity activation function. */ public GaussianReconstructionDistribution() { - this("identity"); - } - - /** - * @deprecated Use {@link #GaussianReconstructionDistribution(Activation)} - */ - @Deprecated - public GaussianReconstructionDistribution(String activationFn) { - this(Activation.fromString(activationFn).getActivationFunction()); + this(Activation.IDENTITY); } /** diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java index 7344ab531d67..05cc5b5f3036 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java @@ -8,11 +8,11 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.BasePretrainNetwork; +import org.deeplearning4j.nn.conf.layers.LayerValidation; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.VariationalAutoencoderParamInitializer; import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.util.LayerValidation; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationIdentity; @@ -80,20 +80,6 @@ public ParamInitializer initializer() { return VariationalAutoencoderParamInitializer.getInstance(); } - @Override - public double getLearningRateByParam(String paramName) { - if (paramName.endsWith("b")) { - if (!Double.isNaN(biasLearningRate)) { - //Bias learning rate has been explicitly set - return biasLearningRate; - } else { - return learningRate; - } - } else { - return learningRate; - } - } - @Override public double getL1ByParam(String paramName) { if (paramName.endsWith(VariationalAutoencoderParamInitializer.BIAS_KEY_SUFFIX)) @@ -153,7 +139,7 @@ public LayerMemoryReport getMemoryReport(InputType inputType) { int trainWorkingMemSize = 2 * (inferenceWorkingMemSizePerEx + decoderFwdSizeWorking); - if (getDropOut() > 0) { + if (getIDropout() != null) { if (false) { //TODO drop connect //Dup the weights... note that this does NOT depend on the minibatch size... @@ -273,15 +259,6 @@ public Builder pzxActivationFn(IActivation activationFunction) { return this; } - - /** - * @deprecated Use {@link #pzxActivationFunction(Activation)} - */ - @Deprecated - public Builder pzxActivationFunction(String activationFunction) { - return pzxActivationFn(Activation.fromString(activationFunction).getActivationFunction()); - } - /** * Activation function for the input to P(z|data).
* Care should be taken with this, as some activation functions (relu, etc) are not suitable due to being diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java index 405e6a12810f..b319e6a3cd16 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java @@ -1,9 +1,9 @@ package org.deeplearning4j.nn.conf.preprocessor; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * @author Adam Gibson diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java index 8d04982e6b1e..ba672ceab82f 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java @@ -19,12 +19,12 @@ package org.deeplearning4j.nn.conf.preprocessor; import lombok.Data; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.shape.Shape; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.shade.jackson.annotation.JsonCreator; import org.nd4j.shade.jackson.annotation.JsonProperty; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java index 062529c4a460..3b4e96cfbac5 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java @@ -1,15 +1,12 @@ package org.deeplearning4j.nn.conf.preprocessor; -import lombok.AccessLevel; -import lombok.Data; -import lombok.Getter; -import lombok.Setter; -import org.nd4j.linalg.primitives.Pair; +import lombok.*; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.util.TimeSeriesUtils; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.util.ArrayUtil; import org.nd4j.shade.jackson.annotation.JsonCreator; import org.nd4j.shade.jackson.annotation.JsonProperty; @@ -30,6 +27,7 @@ * @author Alex Black */ @Data +@EqualsAndHashCode(exclude = {"product"}) public class CnnToRnnPreProcessor implements InputPreProcessor { private int inputHeight; private int inputWidth; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java index 920e145f96c0..ac1d02a3a95f 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java @@ -20,11 +20,11 @@ import lombok.Data; import lombok.EqualsAndHashCode; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.shade.jackson.annotation.JsonCreator; import org.nd4j.shade.jackson.annotation.JsonProperty; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java index 3eba7bac42db..72bb972d1843 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java @@ -18,16 +18,13 @@ package org.deeplearning4j.nn.conf.preprocessor; -import lombok.AccessLevel; -import lombok.Data; -import lombok.Getter; -import lombok.Setter; -import org.nd4j.linalg.primitives.Pair; +import lombok.*; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.shape.Shape; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.util.ArrayUtil; import org.nd4j.shade.jackson.annotation.JsonCreator; import org.nd4j.shade.jackson.annotation.JsonProperty; @@ -50,6 +47,7 @@ * @see CnnToFeedForwardPreProcessor for opposite case (i.e., CNN -> DenseLayer etc) */ @Data +@EqualsAndHashCode(exclude = {"shape"}) public class FeedForwardToCnnPreProcessor implements InputPreProcessor { private int inputHeight; private int inputWidth; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java index b98454aeddf0..5facd986ab70 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java @@ -2,13 +2,13 @@ import lombok.Data; import lombok.NoArgsConstructor; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.util.TimeSeriesUtils; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.shape.Shape; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java index cf3b011414e6..c3ea5b17d0be 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java @@ -1,15 +1,12 @@ package org.deeplearning4j.nn.conf.preprocessor; -import lombok.AccessLevel; -import lombok.Data; -import lombok.Getter; -import lombok.Setter; -import org.nd4j.linalg.primitives.Pair; +import lombok.*; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.util.TimeSeriesUtils; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.util.ArrayUtil; import org.nd4j.shade.jackson.annotation.JsonProperty; @@ -30,6 +27,7 @@ * @author Alex Black */ @Data +@EqualsAndHashCode(exclude = {"product"}) public class RnnToCnnPreProcessor implements InputPreProcessor { private int inputHeight; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java index 10bc837dd8cd..8ee0ea7120a4 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java @@ -2,13 +2,13 @@ import lombok.Data; import lombok.extern.slf4j.Slf4j; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.util.TimeSeriesUtils; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.shape.Shape; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java index dfad27d8a709..c3e3672fd50a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java @@ -11,9 +11,9 @@ import org.nd4j.shade.jackson.databind.JsonMappingException; import org.nd4j.shade.jackson.databind.deser.ResolvableDeserializer; import org.nd4j.shade.jackson.databind.deser.std.StdDeserializer; +import org.nd4j.shade.jackson.databind.node.ObjectNode; import java.io.IOException; -import java.util.Map; /** * A custom (abstract) deserializer that handles backward compatibility (currently only for updater refactoring that @@ -38,56 +38,104 @@ public BaseNetConfigDeserializer(JsonDeserializer defaultDeserializer, Class< public abstract T deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException; - - protected void handleUpdaterBackwardCompatibility(Layer[] layers) { - //Updater configuration changed after 0.8.0 release - //Previously: enumerations and a bunch of fields. Now: classes - //Here, we manually create the appropriate Updater instances, if the iupdater field is empty - for (int i = 0; i < layers.length; i++) { - Layer l = layers[i]; - if (l == null || !(l instanceof BaseLayer) || ((BaseLayer) l).getIUpdater() != null) { - //OK - no need to manually handle IUpdater instances for this layer - continue; + protected boolean requiresIUpdaterFromLegacy(Layer[] layers){ + for(Layer l : layers){ + if(l instanceof BaseLayer){ + BaseLayer bl = (BaseLayer)l; + if(bl.getIUpdater() == null && bl.initializer().numParams(bl) > 0){ + return true; + } } + } + return false; + } - BaseLayer bl = (BaseLayer) l; + protected boolean requiresDropoutFromLegacy(Layer[] layers){ + for(Layer l : layers){ + if(l.getIDropout() != null){ + return false; + } + } + return true; + } - Updater u = bl.getUpdater(); - double lr = bl.getLearningRate(); - double eps = bl.getEpsilon(); - double rho = bl.getRho(); + protected void handleUpdaterBackwardCompatibility(BaseLayer layer, ObjectNode on){ + if(on != null && on.has("updater")){ + String updaterName = on.get("updater").asText(); + if(updaterName != null){ + Updater u = Updater.valueOf(updaterName); + IUpdater iu = u.getIUpdaterWithDefaultConfig(); + double lr = on.get("learningRate").asDouble(); + double eps; + if(on.has("epsilon")){ + eps = on.get("epsilon").asDouble(); + } else { + eps = Double.NaN; + } + double rho = on.get("rho").asDouble(); + switch (u){ + case SGD: + ((Sgd)iu).setLearningRate(lr); + break; + case ADAM: + if(Double.isNaN(eps)){ + eps = Adam.DEFAULT_ADAM_EPSILON; + } + ((Adam)iu).setLearningRate(lr); + ((Adam)iu).setBeta1(on.get("adamMeanDecay").asDouble()); + ((Adam)iu).setBeta2(on.get("adamVarDecay").asDouble()); + ((Adam)iu).setEpsilon(eps); + break; + case ADAMAX: + if(Double.isNaN(eps)){ + eps = AdaMax.DEFAULT_ADAMAX_EPSILON; + } + ((AdaMax)iu).setLearningRate(lr); + ((AdaMax)iu).setBeta1(on.get("adamMeanDecay").asDouble()); + ((AdaMax)iu).setBeta2(on.get("adamVarDecay").asDouble()); + ((AdaMax)iu).setEpsilon(eps); + break; + case ADADELTA: + if(Double.isNaN(eps)){ + eps = AdaDelta.DEFAULT_ADADELTA_EPSILON; + } + ((AdaDelta)iu).setRho(rho); + ((AdaDelta)iu).setEpsilon(eps); + break; + case NESTEROVS: + ((Nesterovs)iu).setLearningRate(lr); + ((Nesterovs)iu).setMomentum(on.get("momentum").asDouble()); + break; + case NADAM: + if(Double.isNaN(eps)){ + eps = Nadam.DEFAULT_NADAM_EPSILON; + } + ((Nadam)iu).setLearningRate(lr); + ((Nadam)iu).setBeta1(on.get("adamMeanDecay").asDouble()); + ((Nadam)iu).setBeta2(on.get("adamVarDecay").asDouble()); + ((Nadam)iu).setEpsilon(eps); + break; + case ADAGRAD: + if(Double.isNaN(eps)){ + eps = AdaGrad.DEFAULT_ADAGRAD_EPSILON; + } + ((AdaGrad)iu).setLearningRate(lr); + ((AdaGrad)iu).setEpsilon(eps); + break; + case RMSPROP: + if(Double.isNaN(eps)){ + eps = RmsProp.DEFAULT_RMSPROP_EPSILON; + } + ((RmsProp)iu).setLearningRate(lr); + ((RmsProp)iu).setEpsilon(eps); + ((RmsProp)iu).setRmsDecay(on.get("rmsDecay").asDouble()); + break; + default: + //No op + break; + } - switch (u) { - case SGD: - bl.setIUpdater(new Sgd(lr)); - break; - case ADAM: - double meanDecay = bl.getAdamMeanDecay(); - double varDecay = bl.getAdamVarDecay(); - bl.setIUpdater(Adam.builder().learningRate(lr).beta1(meanDecay).beta2(varDecay).epsilon(eps) - .build()); - break; - case ADADELTA: - bl.setIUpdater(new AdaDelta(rho, eps)); - break; - case NESTEROVS: - Map momentumSchedule = bl.getMomentumSchedule(); - double momentum = bl.getMomentum(); - bl.setIUpdater(new Nesterovs(lr, momentum, momentumSchedule)); - break; - case ADAGRAD: - bl.setIUpdater(new AdaGrad(lr, eps)); - break; - case RMSPROP: - double rmsDecay = bl.getRmsDecay(); - bl.setIUpdater(new RmsProp(lr, rmsDecay, eps)); - break; - case NONE: - bl.setIUpdater(new NoOp()); - break; - case CUSTOM: - //No op - shouldn't happen - break; + layer.setIUpdater(iu); } } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java index 8f094d64e029..26efa9f72091 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java @@ -1,16 +1,24 @@ package org.deeplearning4j.nn.conf.serde; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.graph.LayerVertex; +import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.weightnoise.DropConnect; +import org.nd4j.shade.jackson.core.JsonLocation; import org.nd4j.shade.jackson.core.JsonParser; -import org.nd4j.shade.jackson.core.JsonProcessingException; import org.nd4j.shade.jackson.databind.DeserializationContext; import org.nd4j.shade.jackson.databind.JsonDeserializer; +import org.nd4j.shade.jackson.databind.JsonNode; +import org.nd4j.shade.jackson.databind.ObjectMapper; +import org.nd4j.shade.jackson.databind.node.ObjectNode; import java.io.IOException; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import java.util.Map; @@ -23,10 +31,11 @@ public ComputationGraphConfigurationDeserializer(JsonDeserializer defaultDese } @Override - public ComputationGraphConfiguration deserialize(JsonParser jp, DeserializationContext ctxt) - throws IOException, JsonProcessingException { + public ComputationGraphConfiguration deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { + long charOffsetStart = jp.getCurrentLocation().getCharOffset(); ComputationGraphConfiguration conf = (ComputationGraphConfiguration) defaultDeserializer.deserialize(jp, ctxt); + //Updater configuration changed after 0.8.0 release //Previously: enumerations and fields. Now: classes //Here, we manually create the appropriate Updater instances, if the IUpdater field is empty @@ -41,7 +50,56 @@ public ComputationGraphConfiguration deserialize(JsonParser jp, DeserializationC } Layer[] layers = layerList.toArray(new Layer[layerList.size()]); - handleUpdaterBackwardCompatibility(layers); + //Now, check if we need to manually handle IUpdater deserialization from legacy format + boolean attemptIUpdaterFromLegacy = requiresIUpdaterFromLegacy(layers); + + if(attemptIUpdaterFromLegacy) { + JsonLocation endLocation = jp.getCurrentLocation(); + long charOffsetEnd = endLocation.getCharOffset(); + String jsonSubString = endLocation.getSourceRef().toString().substring((int) charOffsetStart - 1, (int) charOffsetEnd); + + ObjectMapper om = NeuralNetConfiguration.mapper(); + JsonNode rootNode = om.readTree(jsonSubString); + + ObjectNode verticesNode = (ObjectNode) rootNode.get("vertices"); + Iterator iter = verticesNode.elements(); + int layerIdx = 0; + while(iter.hasNext()){ + JsonNode next = iter.next(); + ObjectNode confNode = null; + if(next.has("LayerVertex")){ + next = next.get("LayerVertex"); + if(next.has("layerConf")){ + confNode = (ObjectNode) next.get("layerConf"); + next = confNode.get("layer").elements().next(); + } else { + continue; + } + + if(layers[layerIdx] instanceof BaseLayer && ((BaseLayer)layers[layerIdx]).getIUpdater() == null){ + handleUpdaterBackwardCompatibility((BaseLayer)layers[layerIdx], (ObjectNode)next); + } + + if(layers[layerIdx].getIDropout() == null){ + //Check for legacy dropout + if(next.has("dropOut")){ + double d = next.get("dropOut").asDouble(); + if(!Double.isNaN(d)){ + //Might be dropout or dropconnect... + if(layers[layerIdx] instanceof BaseLayer && confNode.has("useDropConnect") + && confNode.get("useDropConnect").asBoolean(false)){ + ((BaseLayer)layers[layerIdx]).setWeightNoise(new DropConnect(d)); + } else { + layers[layerIdx].setIDropout(new Dropout(d)); + } + } + } + } + + layerIdx++; + } + } + } return conf; } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/MultiLayerConfigurationDeserializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/MultiLayerConfigurationDeserializer.java index 6ec48654580e..e7e01ae4980b 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/MultiLayerConfigurationDeserializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/MultiLayerConfigurationDeserializer.java @@ -1,11 +1,19 @@ package org.deeplearning4j.nn.conf.serde; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.dropout.Dropout; +import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.weightnoise.DropConnect; +import org.nd4j.shade.jackson.core.JsonLocation; import org.nd4j.shade.jackson.core.JsonParser; -import org.nd4j.shade.jackson.core.JsonProcessingException; import org.nd4j.shade.jackson.databind.DeserializationContext; import org.nd4j.shade.jackson.databind.JsonDeserializer; +import org.nd4j.shade.jackson.databind.JsonNode; +import org.nd4j.shade.jackson.databind.ObjectMapper; +import org.nd4j.shade.jackson.databind.node.ArrayNode; +import org.nd4j.shade.jackson.databind.node.ObjectNode; import java.io.IOException; @@ -16,20 +24,63 @@ public MultiLayerConfigurationDeserializer(JsonDeserializer defaultDeserializ } @Override - public MultiLayerConfiguration deserialize(JsonParser jp, DeserializationContext ctxt) - throws IOException, JsonProcessingException { - MultiLayerConfiguration conf = (MultiLayerConfiguration) defaultDeserializer.deserialize(jp, ctxt); - - //Updater configuration changed after 0.8.0 release - //Previously: enumerations and fields. Now: classes - //Here, we manually create the appropriate Updater instances, if the IUpdater field is empty + public MultiLayerConfiguration deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { + long charOffsetStart = jp.getCurrentLocation().getCharOffset(); + MultiLayerConfiguration conf = (MultiLayerConfiguration) defaultDeserializer.deserialize(jp, ctxt); Layer[] layers = new Layer[conf.getConfs().size()]; for (int i = 0; i < layers.length; i++) { layers[i] = conf.getConf(i).getLayer(); } - handleUpdaterBackwardCompatibility(layers); + //Now, check if we need to manually handle IUpdater deserialization from legacy format + boolean attemptIUpdaterFromLegacy = requiresIUpdaterFromLegacy(layers); + + + if(attemptIUpdaterFromLegacy) { + JsonLocation endLocation = jp.getCurrentLocation(); + long charOffsetEnd = endLocation.getCharOffset(); + String jsonSubString = endLocation.getSourceRef().toString().substring((int) charOffsetStart - 1, (int) charOffsetEnd); + + ObjectMapper om = NeuralNetConfiguration.mapper(); + JsonNode rootNode = om.readTree(jsonSubString); + + ArrayNode confsNode = (ArrayNode)rootNode.get("confs"); + + for( int i=0; i (first/only child) -> updater + if(on.has("layer")){ + confNode = on; + on = (ObjectNode) on.get("layer"); + } else { + continue; + } + on = (ObjectNode) on.elements().next(); + + handleUpdaterBackwardCompatibility((BaseLayer)layers[i], on); + } + + if(layers[i].getIDropout() == null){ + //Check for legacy dropout/dropconnect + if(on.has("dropOut")){ + double d = on.get("dropOut").asDouble(); + if(!Double.isNaN(d)){ + //Might be dropout or dropconnect... + if(confNode != null && layers[i] instanceof BaseLayer && confNode.has("useDropConnect") + && confNode.get("useDropConnect").asBoolean(false)){ + ((BaseLayer)layers[i]).setWeightNoise(new DropConnect(d)); + } else { + layers[i].setIDropout(new Dropout(d)); + } + } + } + } + } + } + return conf; } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java new file mode 100644 index 000000000000..876ed509453c --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java @@ -0,0 +1,87 @@ +package org.deeplearning4j.nn.conf.weightnoise; + +import lombok.Data; +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.random.impl.DropOut; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.schedule.ISchedule; +import org.nd4j.shade.jackson.annotation.JsonProperty; + +/** + * DropConnect, based on Wan et. al 2013 - "Regularization of Neural Networks using DropConnect"
+ * Sets weights randomly to 0 with some probability, or leaves them unchanged. + * + * @author Alex Black + */ +@Data +public class DropConnect implements IWeightNoise { + + private double weightRetainProb; + private ISchedule weightRetainProbSchedule; + private boolean applyToBiases; + + /** + * @param weightRetainProbability Probability of retaining a weight + */ + public DropConnect(double weightRetainProbability) { + this(weightRetainProbability, false); + } + + /** + * @param weightRetainProbability Probability of retaining a weight + * @param applyToBiases If true: apply to biases (default: weights only) + */ + public DropConnect(double weightRetainProbability, boolean applyToBiases) { + this(weightRetainProbability, null, applyToBiases); + } + + /** + * @param weightRetainProbSchedule Probability (schedule) of retaining a weight + */ + public DropConnect(ISchedule weightRetainProbSchedule){ + this(Double.NaN, weightRetainProbSchedule, false); + } + + /** + * @param weightRetainProbSchedule Probability (schedule) of retaining a weight + * @param applyToBiases If true: apply to biases (default: weights only) + */ + public DropConnect(ISchedule weightRetainProbSchedule, boolean applyToBiases){ + this(Double.NaN, weightRetainProbSchedule, applyToBiases); + } + + private DropConnect(@JsonProperty("weightRetainProbability") double weightRetainProbability, + @JsonProperty("weightRetainProbSchedule") ISchedule weightRetainProbSchedule, + @JsonProperty("applyToBiases") boolean applyToBiases) { + this.weightRetainProb = weightRetainProbability; + this.weightRetainProbSchedule = weightRetainProbSchedule; + this.applyToBiases = applyToBiases; + } + + @Override + public INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train) { + ParamInitializer init = layer.conf().getLayer().initializer(); + INDArray param = layer.getParam(paramKey); + + double p; + if(weightRetainProbSchedule == null){ + p = weightRetainProb; + } else { + p = weightRetainProbSchedule.valueAt(iteration, epoch); + } + + if (train && init.isWeightParam(paramKey) || (applyToBiases && init.isBiasParam(paramKey))) { + INDArray out = Nd4j.createUninitialized(param.shape(), param.ordering()); + Nd4j.getExecutioner().exec(new DropOut(param, out, p)); + return out; + } + return param; + } + + @Override + public DropConnect clone() { + return new DropConnect(weightRetainProb, weightRetainProbSchedule, applyToBiases); + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java new file mode 100644 index 000000000000..fd8c7dc83dfd --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java @@ -0,0 +1,36 @@ +package org.deeplearning4j.nn.conf.weightnoise; + +import org.deeplearning4j.nn.api.Layer; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.shade.jackson.annotation.JsonTypeInfo; + +import java.io.Serializable; + +/** + * IWeightNoise instances operate on an weight array(s), modifying values at training time or test + * time, before they are used. Note that the weights are copied before being modified - the original parameters + * are not changed. However, if the pameters are not changed, the original array is returned. + * + * This interface can be used to implement functionality like DropConnect, weight quantization and weight + * noise. + * + * @author Alex Black + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") +public interface IWeightNoise extends Serializable, Cloneable{ + + /** + * Get the parameter, after applying weight noise + * + * @param layer Layer to get the parameter for + * @param paramKey Parameter key + * @param iteration Iteration number + * @param epoch Epoch number + * @param train If true: training. False: at test time + * @return Parameter, after applying weight noise + */ + INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train); + + IWeightNoise clone(); + +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java new file mode 100644 index 000000000000..a1d66a79e10f --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java @@ -0,0 +1,83 @@ +package org.deeplearning4j.nn.conf.weightnoise; + +import lombok.Data; +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.distribution.Distribution; +import org.deeplearning4j.nn.conf.distribution.Distributions; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.impl.transforms.arithmetic.AddOp; +import org.nd4j.linalg.api.ops.impl.transforms.arithmetic.MulOp; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.shade.jackson.annotation.JsonProperty; + +/** + * Apply noise of the specified distribution to the weights at training time. + * Note that both additive and multiplicative modes are supported - when additive, noise should be mean 0, + * when multiplicative, noise should be mean 1. + * That is, additive noise: x = x + noise
+ * multiplicative noise: x = x * noise + * + * @author Alex Black + */ +@Data +public class WeightNoise implements IWeightNoise { + + private Distribution distribution; + private boolean applyToBias; + private boolean additive; + + /** + * @param distribution Distribution for additive noise + */ + public WeightNoise(Distribution distribution) { + this(distribution, false, true); + } + + /** + * @param distribution Distribution for noise + * @param additive If true: noise is added to weights. If false: noise is multiplied by weights + */ + public WeightNoise(Distribution distribution, boolean additive) { + this(distribution, false, additive); + } + + /** + * @param distribution Distribution for noise + * @param applyToBias If true: apply to biases also. If false (default): apply only to weights + * @param additive If true: noise is added to weights. If false: noise is multiplied by weights + */ + public WeightNoise(@JsonProperty("distribution") Distribution distribution, + @JsonProperty("applyToBias") boolean applyToBias, + @JsonProperty("additive") boolean additive) { + this.distribution = distribution; + this.applyToBias = applyToBias; + this.additive = additive; + } + + @Override + public INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train) { + + ParamInitializer init = layer.conf().getLayer().initializer(); + INDArray param = layer.getParam(paramKey); + if (train && init.isWeightParam(paramKey) || (applyToBias && init.isBiasParam(paramKey))) { + + org.nd4j.linalg.api.rng.distribution.Distribution dist = Distributions.createDistribution(distribution); + INDArray noise = dist.sample(param.shape()); + INDArray out = Nd4j.createUninitialized(param.shape(), param.ordering()); + + if (additive) { + Nd4j.getExecutioner().exec(new AddOp(param, noise, out)); + } else { + Nd4j.getExecutioner().exec(new MulOp(param, noise, out)); + } + return out; + } + return param; + } + + @Override + public WeightNoise clone() { + return new WeightNoise(distribution, applyToBias, additive); + } +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java index d2c315080abc..c41498700ee5 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java @@ -20,6 +20,7 @@ import lombok.Getter; import lombok.Setter; +import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.deeplearning4j.datasets.iterator.AsyncDataSetIterator; @@ -72,8 +73,6 @@ import org.nd4j.linalg.memory.abstracts.DummyWorkspace; import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.primitives.Triple; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.Serializable; import java.util.*; @@ -84,10 +83,9 @@ * * @author Alex Black */ +@Slf4j public class ComputationGraph implements Serializable, Model, NeuralNetwork { - private static final Logger log = LoggerFactory.getLogger(ComputationGraph.class); - protected ComputationGraphConfiguration configuration; protected boolean initCalled = false; protected transient Solver solver; //Used to call optimizers during backprop @@ -598,6 +596,7 @@ public void init(INDArray parameters, boolean cloneParametersArray) { } } + synchronizeIterEpochCounts(); initCalled = true; } @@ -881,6 +880,10 @@ public void fit(DataSetIterator iterator) { } else dataSetIterator = iterator; + if(!iterator.hasNext() && iterator.resetSupported()){ + iterator.reset(); + } + if (trainingListeners.size() > 0) { for (TrainingListener tl : trainingListeners) { tl.onEpochStart(this); @@ -1293,6 +1296,7 @@ public int[] topologicalSortOrder() { @Override public void computeGradientAndScore() { + synchronizeIterEpochCounts(); //Calculate activations (which are stored in each layer, and used in backprop) if (configuration.getBackpropType() == BackpropType.TruncatedBPTT) { Map activations = rnnActivateUsingStoredState(inputs, true, true); @@ -1339,6 +1343,11 @@ public void computeGradientAndScore() { } } } + + //Clear the fields (inc. post noise/dropconnect parameters) on the output layers + for( int i=0; i Type of the IEvaluation instance * @return The input IEvaluation instance, after performing evaluation on the test data */ @@ -3177,7 +3181,6 @@ public String summary(InputType... inputTypes) { String in = "-"; String out = "-"; String paramShape = "-"; - if (currentVertex.isInputVertex()) { if (inputTypes != null) vertexOutputs.put(currentVertexName, inputTypes[configuration.getNetworkInputs().indexOf(currentVertexName)]); //for input vertices the outputs are just the input types (only layer vertices have preprocessing?) } else { @@ -3193,7 +3196,7 @@ public String summary(InputType... inputTypes) { paramShape = ""; in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn()); out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut()); - Set paraNames = currentLayer.conf().getLearningRateByParam().keySet(); + List paraNames = currentLayer.conf().variables(); for (String aP : paraNames) { String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape()); paramShape += aP + ":" + paramS + ", "; @@ -3288,6 +3291,16 @@ public void incrementEpochCount(){ configuration.setEpochCount(configuration.getEpochCount() + 1); } + protected void synchronizeIterEpochCounts(){ + //TODO: this is necessrry for some schedules - but the redundant values are a little ugly... + int currIter = getConfiguration().getIterationCount(); + int currEpoch = getConfiguration().getEpochCount(); + for(Layer l : layers){ + l.setIterationCount(currIter); + l.setEpochCount(currEpoch); + } + } + /** * Indicates whether some other object is "equal to" this one. *

diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java index 15ec394888f2..e5e1234399b9 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java @@ -18,11 +18,11 @@ package org.deeplearning4j.nn.graph.vertex; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.io.Serializable; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java index 67d1fd572660..7d8def6fcf66 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -28,6 +27,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.transforms.Or; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; /** An ElementWiseVertex is used to combine the activations of two or more layer in an element-wise manner
* For example, the activations may be combined by addition, subtraction or multiplication. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java index 4796c96546a9..b08e04020f43 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -26,6 +25,7 @@ import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** An InputVertex simply defines the location (and connection structure) of inputs to the ComputationGraph. * It does not define forward or backward methods. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java index 63c5ccef577c..fdc2e3f0ef79 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -30,6 +29,7 @@ import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; /** * L2NormalizeVertex performs L2 normalization on a single input. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java index d260136807bf..79fc7d405e91 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -30,6 +29,7 @@ import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; /** * L2Vertex calculates the L2 least squares error of two inputs. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java index 668368f5ec2b..da12c499d8c8 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java @@ -20,7 +20,6 @@ import lombok.Data; import lombok.EqualsAndHashCode; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.api.layers.IOutputLayer; @@ -33,6 +32,7 @@ import org.deeplearning4j.nn.layers.BaseOutputLayer; import org.deeplearning4j.nn.layers.FrozenLayer; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java index 571b18f2d891..e1d3aff9123f 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -29,6 +28,7 @@ import org.nd4j.linalg.api.ops.impl.transforms.Or; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java index 7a3f95b11f40..cd818461ce28 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -29,6 +28,7 @@ import org.nd4j.linalg.api.ops.impl.transforms.Or; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; /** * A custom layer for removing the first column and row from an input. This is meant to allow diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java index 692342111621..25f821c01c00 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -27,6 +26,7 @@ import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** PreprocessorVertex is a simple adaptor class that allows a {@link InputPreProcessor} to be used in a ComputationGraph * GraphVertex, without it being associated with a layer. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java index a3c75160e87e..fb6f346f9f09 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -26,8 +25,7 @@ import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.impl.transforms.Or; -import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; /** * Adds the ability to reshape and flatten the tensor in the computation graph. This is the equivalent diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java index afc495365d37..b4ae622a8eee 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -26,6 +25,7 @@ import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * A ScaleVertex is used to scale the size of activations of a single layer
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java index 0addc6bff2d7..1ad40b3e2bf0 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -26,6 +25,7 @@ import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * A ShiftVertex is used to shift the activations of a single layer
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java index 2e506203aabb..526686fe100d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -29,6 +28,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; /** * StackVertex allows for stacking of inputs so that they may be forwarded through diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java index 4d8a45297655..d49953f0ac76 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -29,6 +28,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java index 09c63ad7e4b4..f203ae2462a4 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -29,6 +28,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java index c1b1bd0a4c6c..783f8c8299b4 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl.rnn; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -29,6 +28,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; /**DuplicateToTimeSeriesVertex is a vertex that goes from 2d activations to a 3d time series activations, by means of * duplication. That is, given a 2d input with shape [numExamples,nIn] duplicate each row to give output of diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java index 75c23f2cd0c1..856b631c55ef 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.graph.vertex.impl.rnn; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; @@ -29,6 +28,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; /** LastTimeStepVertex is used in the context of recurrent neural network activations, to go from 3d (time series) * activations to 2d activations, by extracting out the last time step of activations for each example.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java index f4bd6a61bf4d..7a33fd96e11a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java @@ -20,20 +20,19 @@ import lombok.Data; import lombok.NoArgsConstructor; -import org.deeplearning4j.nn.api.layers.LayerConstraint; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.util.Dropout; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.*; @@ -54,6 +53,9 @@ public abstract class AbstractLayer 0 && !conf.isUseDropConnect() && training && !dropoutApplied) { + protected void applyDropOutIfNecessary(boolean training){//} int iteration, int epoch) { + if(training && !dropoutApplied && layerConf().getIDropout() != null ){ + //TODO: Epoch + iteration counters... if (Nd4j.getWorkspaceManager().checkIfWorkspaceExists(ComputationGraph.workspaceExternal)) { try (MemoryWorkspace ws = Nd4j.getWorkspaceManager() - .getWorkspaceForCurrentThread(ComputationGraph.workspaceExternal) - .notifyScopeBorrowed()) { - input = input.isView() ? input.dup() : input.unsafeDuplication(); + .getWorkspaceForCurrentThread(ComputationGraph.workspaceExternal) + .notifyScopeBorrowed()) { + input = layerConf().getIDropout().applyDropout(input, getIterationCount(), getEpochCount(), false); } - } else - input = input.isView() ? input.dup() : input.unsafeDuplication(); - - Dropout.applyDropout(input, layerConf().getDropOut()); + } else { + input = layerConf().getIDropout().applyDropout(input, getIterationCount(), getEpochCount(), false); + } dropoutApplied = true; } } - /** - * Averages the given logistic regression from a mini batch into this layer - * @param l the logistic regression layer to average into this layer - * @param batchSize the batch size - */ - @Override - public void merge(Layer l, int batchSize) { - throw new UnsupportedOperationException(); - } - @Override public Type type() { return Type.FEED_FORWARD; @@ -435,11 +411,6 @@ public int getInputMiniBatchSize() { return input.size(0); } - @Override - public void applyLearningRateScoreDecay() { - throw new UnsupportedOperationException("Not supported"); - } - @Override public void setMaskArray(INDArray maskArray) { this.maskArray = maskArray; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java index d4fb2fe6b60f..6fbf9eff051e 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java @@ -19,12 +19,12 @@ package org.deeplearning4j.nn.layers; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** @@ -109,20 +109,9 @@ public boolean isPretrainLayer() { return false; } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray indArray) { - throw new UnsupportedOperationException("Not supported - " + layerId()); - } - - @Override - public void merge(Layer layer, int batchSize) { - throw new UnsupportedOperationException("Not supported - " + layerId()); - } - @Override - public INDArray activationMean() { - return activate(false); + public void clearNoiseWeightParams() { + //No op } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java index b261509ac71d..c52d43091392 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.layers; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -28,10 +27,11 @@ import org.deeplearning4j.nn.params.PretrainParamInitializer; import org.deeplearning4j.optimize.Solver; import org.deeplearning4j.optimize.api.ConvexOptimizer; -import org.deeplearning4j.util.Dropout; +import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; import java.lang.reflect.Constructor; import java.util.*; @@ -52,6 +52,8 @@ public abstract class BaseLayer weightNoiseParams = new HashMap<>(); + public BaseLayer(NeuralNetConfiguration conf) { super(conf); } @@ -65,29 +67,6 @@ public LayerConfT layerConf() { return (LayerConfT) this.conf.getLayer(); } - @Override - public Gradient error(INDArray errorSignal) { - INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY); - Gradient nextLayerGradient = new DefaultGradient(); - INDArray wErrorSignal = errorSignal.mmul(W.transpose()); - nextLayerGradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, wErrorSignal); - return nextLayerGradient; - } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray activation) { - Gradient ret = new DefaultGradient(); - INDArray weightErrorSignal = layerError.getGradientFor(DefaultParamInitializer.WEIGHT_KEY); - INDArray weightError = weightErrorSignal.transpose().mmul(activation).transpose(); - ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightError); - if(hasBias()){ - INDArray biasGradient = weightError.mean(0); - ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradient); - } - - return ret; - } - @Override public Pair backpropGradient(INDArray epsilon) { //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent) @@ -113,7 +92,11 @@ public Pair backpropGradient(INDArray epsilon) { ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad); } - INDArray epsilonNext = params.get(DefaultParamInitializer.WEIGHT_KEY).mmul(delta.transpose()).transpose(); + INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true); + + INDArray epsilonNext = W.mmul(delta.transpose()).transpose(); + + weightNoiseParams.clear(); return new Pair<>(ret, epsilonNext); } @@ -173,7 +156,6 @@ public Gradient gradient() { */ @Override public void iterate(INDArray input) { - setInput(input.dup()); applyDropOutIfNecessary(true); Gradient gradient = gradient(); for (String paramType : gradient.gradientForVariable().keySet()) { @@ -296,10 +278,44 @@ public Map paramTable(boolean backpropParamsOnly) { return params; } + /** + * Get the parameter, after applying any weight noise (such as DropConnect) if necessary. + * Note that during training, this will store the post-noise parameters, as these should be used + * for both forward pass and backprop, for a single iteration. + * Consequently, the parameters (post noise) should be cleared after each training iteration + * + * @param param Parameter key + * @param training If true: during training + * @return The parameter, after applying any noise + */ + protected INDArray getParamWithNoise(String param, boolean training){ + INDArray p; + if(layerConf().getWeightNoise() != null){ + if(training && weightNoiseParams.size() > 0 && weightNoiseParams.containsKey(param) ){ + //Re-use these weights for both forward pass and backprop - don't want to use 2 different params here + //These should be cleared during backprop + return weightNoiseParams.get(param); + } else { + try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + p = layerConf().getWeightNoise().getParameter(this, param, getIterationCount(), getEpochCount(), training); + } + } + + if(training){ + //Store for re-use in backprop + weightNoiseParams.put(param, p); + } + } else { + return getParam(param); + } + + return p; + } + public INDArray preOutput(boolean training) { applyDropOutIfNecessary(training); - INDArray b = getParam(DefaultParamInitializer.BIAS_KEY); - INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY); + INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training); + INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training); //Input validation: if (input.rank() != 2 || input.columns() != W.rows()) { @@ -314,9 +330,6 @@ public INDArray preOutput(boolean training) { + W.size(0) + ") " + layerId()); } - if (conf.isUseDropConnect() && training && layerConf().getDropOut() > 0) { - W = Dropout.applyDropConnect(this, DefaultParamInitializer.WEIGHT_KEY); - } INDArray ret = input.mmul(W); if(hasBias()){ @@ -371,29 +384,6 @@ public double calcL1(boolean backpropParamsOnly) { return l1Sum; } - - @Override - public INDArray activationMean() { - INDArray b = getParam(DefaultParamInitializer.BIAS_KEY); - INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY); - INDArray ret = input().mmul(W); - if(hasBias()){ - ret.addiRowVector(b); - } - return ret; - } - - /** - * Averages the given logistic regression from a mini batch into this layer - * @param l the logistic regression layer to average into this layer - * @param batchSize the batch size - */ - @Override - public void merge(Layer l, int batchSize) { - setParams(params().addi(l.params().divi(batchSize))); - computeGradientAndScore(); - } - @Override public Layer clone() { Layer layer = null; @@ -429,7 +419,7 @@ public int numParams() { @Override public void fit(INDArray input) { if (input != null) { - setInput(input.dup()); + setInput(input); applyDropOutIfNecessary(true); } if (solver == null) { @@ -499,10 +489,14 @@ public void accumulateScore(double accum) { } @Override - public void applyLearningRateScoreDecay() { - for (Map.Entry lrPair : conf.getLearningRateByParam().entrySet()) - conf.setLearningRateByParam(lrPair.getKey(), - lrPair.getValue() * (conf.getLrPolicyDecayRate() + Nd4j.EPS_THRESHOLD)); + public void clear(){ + super.clear(); + weightNoiseParams.clear(); + } + + @Override + public void clearNoiseWeightParams(){ + weightNoiseParams.clear();; } /** diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java index eeb79e8ce2a3..11ce37469be6 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.layers; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.api.Updater; @@ -33,6 +32,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.ILossFunction; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.util.FeatureUtil; import java.io.Serializable; @@ -148,7 +148,11 @@ public Pair backpropGradient(INDArray epsilon) { Pair pair = getGradientsAndDelta(preOutput2d(true)); //Returns Gradient and delta^(this), not Gradient and epsilon^(this-1) INDArray delta = pair.getSecond(); - INDArray epsilonNext = params.get(DefaultParamInitializer.WEIGHT_KEY).mmul(delta.transpose()).transpose(); + INDArray epsilonNext = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true).mmul(delta.transpose()).transpose(); + + //Normally we would clear weightNoiseParams here - but we want to reuse them for forward + backward + score + // So this is instead done in MultiLayerNetwork/CompGraph backprop methods + return new Pair<>(pair.getFirst(), epsilonNext); } @@ -333,7 +337,7 @@ public void fit(INDArray input, INDArray labels) { int updaterStateSize = 0; Map paramTable = paramTable(); for (Map.Entry entry : paramTable.entrySet()) { - updaterStateSize += (int) conf().getLayer().getIUpdaterByParam(entry.getKey()) + updaterStateSize += (int) conf().getLayer().getUpdaterByParam(entry.getKey()) .stateSize(entry.getValue().length()); } if (updaterStateSize > 0) @@ -369,10 +373,7 @@ public void fit(INDArray examples, int[] labels) { @Override public void clear() { super.clear(); - if (labels != null) { - labels.data().destroy(); - labels = null; - } + labels = null; solver = null; } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java index 4cf35461cac0..cd0d92459470 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java @@ -19,7 +19,6 @@ package org.deeplearning4j.nn.layers; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -29,6 +28,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.ILossFunction; +import org.nd4j.linalg.primitives.Pair; import java.util.*; @@ -212,6 +212,9 @@ public Pair backpropGradient(INDArray epsilon) { INDArray vBiasGradient = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY); result.getFirst().gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vBiasGradient); vBiasGradient.assign(0); + + weightNoiseParams.clear(); + return result; } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java index 6dc8355da238..60744dfc8af2 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java @@ -1,11 +1,11 @@ package org.deeplearning4j.nn.layers; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * Created by davekale on 12/7/16. @@ -55,7 +55,7 @@ public INDArray preOutput(boolean training) { if (input == null) { throw new IllegalArgumentException("Cannot perform forward pass with null input " + layerId()); } - applyDropOutIfNecessary(training); //Dups input if necessary + applyDropOutIfNecessary(training); if (maskArray != null) { input.muliColumnVector(maskArray); @@ -80,17 +80,6 @@ public boolean isPretrainLayer() { return false; } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray indArray) { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - - @Override - public void merge(Layer layer, int batchSize) { - throw new UnsupportedOperationException("Not supported - " + layerId()); - } - @Override public INDArray params() { return null; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java index 0dd97c73b9b9..c690f3d14094 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java @@ -1,7 +1,6 @@ package org.deeplearning4j.nn.layers; import lombok.extern.slf4j.Slf4j; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CacheMode; @@ -12,6 +11,7 @@ import org.deeplearning4j.optimize.api.IterationListener; import org.deeplearning4j.util.OneTimeLogger; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Collection; import java.util.Map; @@ -75,42 +75,12 @@ public Type type() { return insideLayer.type(); } - @Override - public Gradient error(INDArray input) { - if (!logGradient) { - OneTimeLogger.info(log, - "Gradients for the frozen layer are not set and will therefore will not be updated.Warning will be issued only once per instance"); - logGradient = true; - } - return zeroGradient; - } - - @Override - public INDArray derivativeActivation(INDArray input) { - return insideLayer.derivativeActivation(input); - } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray indArray) { - return zeroGradient; - } - //FIXME @Override public Pair backpropGradient(INDArray epsilon) { return new Pair<>(zeroGradient, null); } - @Override - public void merge(Layer layer, int batchSize) { - insideLayer.merge(layer, batchSize); - } - - @Override - public INDArray activationMean() { - return insideLayer.activationMean(); - } - @Override public INDArray preOutput(INDArray x) { return insideLayer.preOutput(x); @@ -281,11 +251,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) { //no-op } - @Override - public void applyLearningRateScoreDecay() { - insideLayer.applyLearningRateScoreDecay(); - } - @Override public void fit(INDArray data) { if (!logFit) { @@ -409,6 +374,26 @@ public int getIndex() { return insideLayer.getIndex(); } + @Override + public int getIterationCount() { + return insideLayer.getIterationCount(); + } + + @Override + public int getEpochCount() { + return insideLayer.getEpochCount(); + } + + @Override + public void setIterationCount(int iterationCount) { + insideLayer.setIterationCount(iterationCount); + } + + @Override + public void setEpochCount(int epochCount) { + insideLayer.setEpochCount(epochCount); + } + @Override public void setInput(INDArray input) { insideLayer.setInput(input); @@ -439,6 +424,11 @@ public boolean isPretrainLayer() { return insideLayer.isPretrainLayer(); } + @Override + public void clearNoiseWeightParams() { + insideLayer.clearNoiseWeightParams(); + } + @Override public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java index 950a41246da6..d48a8347af89 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java @@ -19,7 +19,6 @@ package org.deeplearning4j.nn.layers; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.layers.IOutputLayer; @@ -32,6 +31,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.ILossFunction; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.util.FeatureUtil; import java.io.Serializable; @@ -248,17 +248,6 @@ public boolean isPretrainLayer() { return false; } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray indArray) { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - - @Override - public void merge(Layer layer, int batchSize) { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - @Override public INDArray params() { return null; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java index 1006c57a3609..192b02223a86 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java @@ -1,10 +1,10 @@ package org.deeplearning4j.nn.layers.convolution; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java index 8003f4b1570f..39da18d08d2d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java @@ -17,7 +17,6 @@ */ package org.deeplearning4j.nn.layers.convolution; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.AlgoMode; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.BwdDataAlgo; @@ -26,6 +25,7 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * Helper for the convolution layer. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java index 24bf0c8cc2d4..daf414d43151 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.layers.convolution; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.CacheMode; @@ -30,13 +29,13 @@ import org.deeplearning4j.nn.layers.BaseLayer; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.util.ConvolutionUtils; -import org.deeplearning4j.util.Dropout; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.convolution.Convolution; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -120,7 +119,7 @@ public Type type() { @Override public Pair backpropGradient(INDArray epsilon) { - INDArray weights = getParam(ConvolutionParamInitializer.WEIGHT_KEY); + INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true); int miniBatch = input.size(0); int inH = input.size(2); @@ -240,6 +239,8 @@ biasGradView, weightGradView, afn, layerConf().getCudnnAlgoMode(), } retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c'); + weightNoiseParams.clear(); + return new Pair<>(retGradient, epsNext); } @@ -267,11 +268,8 @@ public INDArray preOutput(boolean training) { * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array */ protected Pair preOutput(boolean training, boolean forBackprop) { - INDArray weights = getParam(ConvolutionParamInitializer.WEIGHT_KEY); - INDArray bias = getParam(ConvolutionParamInitializer.BIAS_KEY); - if (conf.isUseDropConnect() && training && conf.getLayer().getDropOut() > 0) { - weights = Dropout.applyDropConnect(this, ConvolutionParamInitializer.WEIGHT_KEY); - } + INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training); + INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training); //Input validation: expect rank 4 matrix if (input.rank() != 4) { @@ -451,20 +449,9 @@ public boolean isPretrainLayer() { return false; } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray indArray) { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - @Override public void fit(INDArray input) {} - @Override - public void merge(Layer layer, int batchSize) { - throw new UnsupportedOperationException(layerId()); - } - @Override public INDArray params() { //C order flattening, to match the gradient flattening order diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java index 384f982bf5fc..6fa510e53d43 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java @@ -17,7 +17,6 @@ */ package org.deeplearning4j.nn.layers.convolution; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -27,6 +26,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; /** * Zero padding 1D layer for convolutional neural networks. @@ -53,6 +53,11 @@ public boolean isPretrainLayer() { return false; } + @Override + public void clearNoiseWeightParams() { + //No op + } + @Override public Type type() { return Type.RECURRENT; @@ -68,11 +73,6 @@ public Pair backpropGradient(INDArray epsilon) { return new Pair<>((Gradient) new DefaultGradient(), epsNext); } - @Override - public INDArray activationMean() { - throw new UnsupportedOperationException(); - } - @Override public INDArray activate(boolean training) { diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java index d5aacb26ebec..8f51c9928b99 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java @@ -1,6 +1,5 @@ package org.deeplearning4j.nn.layers.convolution; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -10,6 +9,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; /** * Zero padding layer for convolutional neural networks. @@ -36,6 +36,11 @@ public boolean isPretrainLayer() { return false; } + @Override + public void clearNoiseWeightParams() { + //No op + } + @Override public Type type() { return Type.CONVOLUTIONAL; @@ -52,11 +57,6 @@ public Pair backpropGradient(INDArray epsilon) { return new Pair<>((Gradient) new DefaultGradient(), epsNext); } - @Override - public INDArray activationMean() { - throw new UnsupportedOperationException(); - } - @Override public INDArray activate(boolean training) { diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java index 9ef57f3a48c3..9e46de835934 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java @@ -1,10 +1,10 @@ package org.deeplearning4j.nn.layers.convolution.subsampling; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java index 97de84f818f3..2611d66bcd9a 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java @@ -17,11 +17,11 @@ */ package org.deeplearning4j.nn.layers.convolution.subsampling; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * Helper for the subsampling layer. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java index 3570578fe3ab..58b063ee754b 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java @@ -19,7 +19,6 @@ package org.deeplearning4j.nn.layers.convolution.subsampling; import lombok.extern.slf4j.Slf4j; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.ConvolutionMode; @@ -29,7 +28,6 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; import org.deeplearning4j.util.ConvolutionUtils; -import org.deeplearning4j.util.Dropout; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.transforms.IsMax; import org.nd4j.linalg.api.ops.impl.transforms.convolution.Pooling2D; @@ -37,6 +35,7 @@ import org.nd4j.linalg.convolution.Convolution; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.util.ArrayUtil; import java.util.Arrays; @@ -246,8 +245,8 @@ public Pair backpropGradient(INDArray epsilon) { @Override public INDArray activate(boolean training) { - if (training && conf.getLayer().getDropOut() > 0) { - Dropout.applyDropout(input, conf.getLayer().getDropOut()); + if (training && !dropoutApplied && layerConf().getIDropout() != null) { + applyDropOutIfNecessary(true); } //Input validation: expect rank 4 matrix @@ -337,27 +336,6 @@ public INDArray activate(boolean training) { return output.reshape('c', miniBatch, inDepth, outH, outW); } - @Override - public Gradient error(INDArray input) { - throw new UnsupportedOperationException(layerId()); - } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray indArray) { - throw new UnsupportedOperationException(layerId()); - } - - - @Override - public void merge(Layer layer, int batchSize) { - throw new UnsupportedOperationException(layerId()); - } - - @Override - public INDArray activationMean() { - return null; - } - @Override public Layer transpose() { throw new UnsupportedOperationException(layerId()); @@ -373,6 +351,11 @@ public boolean isPretrainLayer() { return false; } + @Override + public void clearNoiseWeightParams() { + //no op + } + @Override public void iterate(INDArray input) { throw new UnsupportedOperationException(layerId()); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java new file mode 100644 index 000000000000..3fd5cf6db62d --- /dev/null +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java @@ -0,0 +1,92 @@ +/*- + * + * * Copyright 2015 Skymind,Inc. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package org.deeplearning4j.nn.layers.convolution.upsampling; + +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.BaseUpsamplingLayer; +import org.deeplearning4j.nn.gradient.Gradient; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; + + +/** + * 1D Upsampling layer. + *

+ * Used for upsampling a 1D convolution. Currently derived from 2D version. + * For forward and backward pass we add a dummy dimension, apply the 2D version + * and strip the extra dimension again. Eventually, we will want to migrate to a + * proper 1D version without this overhead. + * + * @author Max Pumperla + */ +@Slf4j +public class Upsampling1D extends Upsampling2D { + + + public Upsampling1D(NeuralNetConfiguration conf) { + super(conf); + } + + public Upsampling1D(NeuralNetConfiguration conf, INDArray input) { + super(conf, input); + } + + + @Override + public Pair backpropGradient(INDArray epsilon) { + + int size = ((BaseUpsamplingLayer) layerConf()).getSize(); + epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1); + // we replicate the error term times "size" so that backprop works properly on it + epsilon = epsilon.repeat(3, size); + + INDArray originalInput = input; + input = input.reshape(input.size(0), input.size(1), input.size(2), 1); + + Pair gradientEpsNext = super.backpropGradient(epsilon); + INDArray epsNext = gradientEpsNext.getSecond(); + Gradient gradient = gradientEpsNext.getFirst(); + + epsNext = epsNext.slice(0, 3); + input = originalInput; + + // Since we aggregate the gradient across "size" slices, we need to normalize afterwards. + return new Pair<>(gradient, epsNext.divi(size)); + } + + @Override + public INDArray preOutput(boolean training) { + return preOutput(training, false); + } + + public INDArray preOutput(boolean training, boolean forBackprop) { + INDArray originalInput = input; + input = input.reshape(input.size(0), input.size(1), input.size(2), 1); + + INDArray preOutput = super.preOutput(training, forBackprop); + + input = originalInput; + preOutput = preOutput.slice(0, 3); + + return preOutput; + } + + +} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java index 20a6223e5a25..aaeb91128326 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java @@ -23,11 +23,11 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.BaseUpsamplingLayer; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.AbstractLayer; -import org.deeplearning4j.util.Dropout; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; @@ -83,7 +83,7 @@ public Pair backpropGradient(INDArray epsilon) { int inH = input.size(2); int inW = input.size(3); - int size = layerConf().getSize(); + int size = ((BaseUpsamplingLayer) layerConf()).getSize(); INDArray outEpsilon = Nd4j.createUninitialized(miniBatch * inDepth * inH * inW); INDArray reshapedEpsilon = outEpsilon.reshape('c', miniBatch, inDepth, inH, inW); @@ -100,7 +100,6 @@ public Pair backpropGradient(INDArray epsilon) { .build(); Nd4j.getExecutioner().exec(op); - return new Pair<>(gradient, reshapedEpsilon); } @@ -110,10 +109,7 @@ public INDArray preOutput(boolean training) { } public INDArray preOutput(boolean training, boolean forBackprop) { - - if (training && conf.getLayer().getDropOut() > 0) { - Dropout.applyDropout(input, conf.getLayer().getDropOut()); - } + applyDropOutIfNecessary(training); if (input.rank() != 4) { throw new DL4JInvalidInputException("Got rank " + input.rank() @@ -131,7 +127,7 @@ public INDArray preOutput(boolean training, boolean forBackprop) { int inH = input.size(2); int inW = input.size(3); - int size = layerConf().getSize(); + int size = ((BaseUpsamplingLayer) layerConf()).getSize(); int outH = inH * size; int outW = inW * size; @@ -152,10 +148,7 @@ public INDArray preOutput(boolean training, boolean forBackprop) { @Override public INDArray activate(boolean training) { - - if (training && conf.getLayer().getDropOut() > 0) { - Dropout.applyDropout(input, conf.getLayer().getDropOut()); - } + applyDropOutIfNecessary(training); if (cacheMode == null) cacheMode = CacheMode.NONE; @@ -173,27 +166,6 @@ public INDArray activate(boolean training) { return z; } - @Override - public Gradient error(INDArray input) { - throw new UnsupportedOperationException(layerId()); - } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray indArray) { - throw new UnsupportedOperationException(layerId()); - } - - - @Override - public void merge(Layer layer, int batchSize) { - throw new UnsupportedOperationException(layerId()); - } - - @Override - public INDArray activationMean() { - return null; - } - @Override public Layer transpose() { throw new UnsupportedOperationException(layerId()); @@ -209,6 +181,11 @@ public boolean isPretrainLayer() { return false; } + @Override + public void clearNoiseWeightParams() { + //No op + } + @Override public void iterate(INDArray input) { throw new UnsupportedOperationException(layerId()); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java index 164ff2c5e55b..a9d233412099 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java @@ -18,12 +18,11 @@ package org.deeplearning4j.nn.layers.feedforward.autoencoder; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.layers.BasePretrainNetwork; import org.deeplearning4j.nn.params.PretrainParamInitializer; -import org.deeplearning4j.util.Dropout; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * Autoencoder. @@ -58,11 +57,8 @@ public Pair sampleVisibleGivenHidden(INDArray h) { // Encode public INDArray encode(INDArray v, boolean training) { - INDArray W = getParam(PretrainParamInitializer.WEIGHT_KEY); - if (training && conf.isUseDropConnect() && conf.getLayer().getDropOut() > 0) { - W = Dropout.applyDropConnect(this, PretrainParamInitializer.WEIGHT_KEY); - } - INDArray hBias = getParam(PretrainParamInitializer.BIAS_KEY); + INDArray W = getParamWithNoise(PretrainParamInitializer.WEIGHT_KEY, training); + INDArray hBias = getParamWithNoise(PretrainParamInitializer.BIAS_KEY, training); INDArray preAct = v.mmul(W).addiRowVector(hBias); //INDArray ret = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf.getLayer().getActivationFunction(), preAct)); @@ -73,8 +69,8 @@ public INDArray encode(INDArray v, boolean training) { // Decode public INDArray decode(INDArray y) { - INDArray W = getParam(PretrainParamInitializer.WEIGHT_KEY); - INDArray vBias = getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY); + INDArray W = getParamWithNoise(PretrainParamInitializer.WEIGHT_KEY, true); + INDArray vBias = getParamWithNoise(PretrainParamInitializer.VISIBLE_BIAS_KEY, true); INDArray preAct = y.mmul(W.transposei()).addiRowVector(vBias); //return Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf.getLayer().getActivationFunction(), preAct)); return layerConf().getActivationFn().getActivation(preAct, true); @@ -110,7 +106,7 @@ public INDArray activate() { @Override public void computeGradientAndScore() { - INDArray W = getParam(PretrainParamInitializer.WEIGHT_KEY); + INDArray W = getParamWithNoise(PretrainParamInitializer.WEIGHT_KEY, true); double corruptionLevel = layerConf().getCorruptionLevel(); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java index 174fc284d3fa..1248ae434d06 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java @@ -29,6 +29,7 @@ import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; /**Embedding layer: feed-forward layer that expects single integers per example as input (class numbers, in range 0 to numClass-1) * as input. This input has shape [numExamples,1] instead of [numExamples,numClasses] for the equivalent one-hot representation. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBM.java index 02ea87c4b33c..74c51a8d3f2d 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBM.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/rbm/RBM.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.layers.feedforward.rbm; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -27,11 +26,11 @@ import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.nn.params.PretrainParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.deeplearning4j.util.Dropout; import org.deeplearning4j.util.RBMUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.rng.distribution.Distribution; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import static org.nd4j.linalg.ops.transforms.Transforms.*; @@ -309,12 +308,9 @@ public Pair sampleVisibleGivenHidden(INDArray h) { } public INDArray preOutput(INDArray v, boolean training) { - INDArray hBias = getParam(PretrainParamInitializer.BIAS_KEY); - INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY); - if (training && conf.isUseDropConnect() && conf.getLayer().getDropOut() > 0) { - W = Dropout.applyDropConnect(this, DefaultParamInitializer.WEIGHT_KEY); - } - return v.mmul(W).addiRowVector(hBias); + INDArray weights = getParamWithNoise(PretrainParamInitializer.WEIGHT_KEY, training); + INDArray bias = getParamWithNoise(PretrainParamInitializer.BIAS_KEY, training); + return v.mmul(weights).addiRowVector(bias); } /** @@ -422,8 +418,8 @@ public INDArray propDown(INDArray h) { */ @Override public INDArray activate(boolean training) { - if (training && conf.getLayer().getDropOut() > 0.0) { - Dropout.applyDropout(input, conf.getLayer().getDropOut()); + if (training && conf.getLayer().getIDropout() != null) { + applyDropOutIfNecessary(training); } //reconstructed: propUp ----> hidden propDown to transform INDArray propUp = propUp(input, training); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java index 6ff23b94f388..9725c2ea0826 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java @@ -1,7 +1,6 @@ package org.deeplearning4j.nn.layers.normalization; import lombok.extern.slf4j.Slf4j; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -18,6 +17,7 @@ import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.Arrays; @@ -80,16 +80,6 @@ public Type type() { return Type.NORMALIZATION; } - @Override - public Gradient error(INDArray input) { - return null; - } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray indArray) { - return null; - } - @Override public Pair backpropGradient(INDArray epsilon) { INDArray nextEpsilon; @@ -219,11 +209,6 @@ public Pair backpropGradient(INDArray epsilon) { return new Pair<>(retGradient, nextEpsilon); } - @Override - public void merge(Layer layer, int batchSize) { - throw new UnsupportedOperationException(layerId()); - } - @Override public void fit(INDArray data) {} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java index d388da5c50fe..b97e60be3662 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java @@ -17,9 +17,9 @@ */ package org.deeplearning4j.nn.layers.normalization; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * Helper for the batch normalization layer. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java index 59640b2fd5df..cb1636690e00 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java @@ -1,6 +1,5 @@ package org.deeplearning4j.nn.layers.normalization; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -11,6 +10,7 @@ import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -188,20 +188,9 @@ public boolean isPretrainLayer() { return false; } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray indArray) { - throw new UnsupportedOperationException("Not supported - " + layerId()); - } - - @Override - public void merge(Layer layer, int batchSize) { - throw new UnsupportedOperationException(layerId()); - } - @Override - public INDArray activationMean() { - return activate(false); + public void clearNoiseWeightParams() { + //No op } @Override diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java index 0b6cc49378ab..c2bf9987e544 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java @@ -17,9 +17,9 @@ */ package org.deeplearning4j.nn.layers.normalization; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; /** * Helper for the local response normalization layer. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java index da06606068c6..a820ee54e0a5 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java @@ -16,7 +16,6 @@ import org.nd4j.linalg.activations.impl.ActivationSoftmax; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.LossFunction; import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp; import org.nd4j.linalg.api.ops.impl.transforms.IsMax; import org.nd4j.linalg.api.ops.impl.transforms.Not; @@ -325,11 +324,6 @@ private INDArray computeBackpropGradientAndScore(){ return epsOut; } - @Override - public INDArray activationMean() { - return activate(); - } - @Override public INDArray activate(boolean training) { //Essentially: just apply activation functions... @@ -595,6 +589,11 @@ public boolean isPretrainLayer() { return false; } + @Override + public void clearNoiseWeightParams() { + //No op + } + /** * Given the network output and a detection threshold (in range 0 to 1) determine the objects detected by * the network.
diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java index b1fecd76875a..c5edb84abc5f 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java @@ -1,7 +1,6 @@ package org.deeplearning4j.nn.layers.pooling; import org.apache.commons.lang3.ArrayUtils; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -16,6 +15,7 @@ import org.nd4j.linalg.api.ops.impl.transforms.IsMax; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; @@ -72,6 +72,11 @@ public boolean isPretrainLayer() { return false; } + @Override + public void clearNoiseWeightParams() { + //No op + } + @Override public double calcL2(boolean backpropParamsOnly) { return 0; @@ -272,11 +277,6 @@ public Pair backpropGradient(INDArray epsilon) { return new Pair<>(retGradient, epsilonNd); } - @Override - public INDArray activationMean() { - throw new UnsupportedOperationException("Not supported"); - } - private INDArray epsilonHelperFullArray(INDArray inputArray, INDArray epsilon, int[] poolDim) { //Broadcast: occurs on the remaining dimensions, after the pool dimensions have been removed. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java index 64ef154d7760..9208b24aaced 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java @@ -19,7 +19,6 @@ package org.deeplearning4j.nn.layers.recurrent; import lombok.extern.slf4j.Slf4j; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CacheMode; @@ -28,6 +27,7 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.GravesBidirectionalLSTMParamInitializer; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Map; @@ -71,11 +71,6 @@ public Gradient gradient() { throw new UnsupportedOperationException("Not supported " + layerId()); } - @Override - public Gradient calcGradient(Gradient layerError, INDArray activation) { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - @Override public Pair backpropGradient(INDArray epsilon) { return backpropGradientHelper(epsilon, false, -1); @@ -265,11 +260,6 @@ private FwdPassReturn activateHelperDirectional(final boolean training, final IN } } - @Override - public INDArray activationMean() { - return activate(); - } - @Override public Type type() { return Type.RECURRENT; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java index 25f6222aca97..7778addcb749 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java @@ -19,7 +19,6 @@ package org.deeplearning4j.nn.layers.recurrent; import lombok.extern.slf4j.Slf4j; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CacheMode; @@ -28,6 +27,7 @@ import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.params.GravesLSTMParamInitializer; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Map; @@ -64,11 +64,6 @@ public Gradient gradient() { + layerId()); } - @Override - public Gradient calcGradient(Gradient layerError, INDArray activation) { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - @Override public Pair backpropGradient(INDArray epsilon) { return backpropGradientHelper(epsilon, false, -1); @@ -83,8 +78,8 @@ public Pair tbpttBackpropGradient(INDArray epsilon, int tbpt private Pair backpropGradientHelper(final INDArray epsilon, final boolean truncatedBPTT, final int tbpttBackwardLength) { - final INDArray inputWeights = getParam(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY); - final INDArray recurrentWeights = getParam(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG] + final INDArray inputWeights = getParamWithNoise(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, true); + final INDArray recurrentWeights = getParamWithNoise(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY, true); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG] //First: Do forward pass to get gate activations, zs etc. FwdPassReturn fwdPass; @@ -99,10 +94,14 @@ private Pair backpropGradientHelper(final INDArray epsilon, } - return LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input, + Pair p = LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input, recurrentWeights, inputWeights, epsilon, truncatedBPTT, tbpttBackwardLength, fwdPass, true, GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY, GravesLSTMParamInitializer.BIAS_KEY, gradientViews, maskArray, true, null); + + weightNoiseParams.clear(); + + return p; } @@ -152,9 +151,9 @@ private FwdPassReturn activateHelper(final boolean training, final INDArray prev return ret; } - final INDArray recurrentWeights = getParam(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG] - final INDArray inputWeights = getParam(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY); //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg] - final INDArray biases = getParam(GravesLSTMParamInitializer.BIAS_KEY); //by row: IFOG //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T + final INDArray recurrentWeights = getParamWithNoise(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY, training); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG] + final INDArray inputWeights = getParamWithNoise(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, training); //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg] + final INDArray biases = getParamWithNoise(GravesLSTMParamInitializer.BIAS_KEY, training); //by row: IFOG //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T FwdPassReturn fwd = LSTMHelpers.activateHelper(this, this.conf, this.layerConf().getGateActivationFn(), this.input, recurrentWeights, inputWeights, biases, training, prevOutputActivations, @@ -170,11 +169,6 @@ private FwdPassReturn activateHelper(final boolean training, final INDArray prev return fwd; } - @Override - public INDArray activationMean() { - return activate(); - } - @Override public Type type() { return Type.RECURRENT; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java index 4f8817652096..548073cce377 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java @@ -19,7 +19,6 @@ package org.deeplearning4j.nn.layers.recurrent; import lombok.extern.slf4j.Slf4j; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CacheMode; @@ -28,6 +27,7 @@ import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.params.LSTMParamInitializer; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Map; @@ -80,11 +80,6 @@ public Gradient gradient() { + layerId()); } - @Override - public Gradient calcGradient(Gradient layerError, INDArray activation) { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - @Override public Pair backpropGradient(INDArray epsilon) { return backpropGradientHelper(epsilon, false, -1); @@ -99,8 +94,8 @@ public Pair tbpttBackpropGradient(INDArray epsilon, int tbpt private Pair backpropGradientHelper(final INDArray epsilon, final boolean truncatedBPTT, final int tbpttBackwardLength) { - final INDArray inputWeights = getParam(LSTMParamInitializer.INPUT_WEIGHT_KEY); - final INDArray recurrentWeights = getParam(LSTMParamInitializer.RECURRENT_WEIGHT_KEY); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG] + final INDArray inputWeights = getParamWithNoise(LSTMParamInitializer.INPUT_WEIGHT_KEY, true); + final INDArray recurrentWeights = getParamWithNoise(LSTMParamInitializer.RECURRENT_WEIGHT_KEY, true); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG] //First: Do forward pass to get gate activations, zs etc. FwdPassReturn fwdPass; @@ -115,10 +110,13 @@ private Pair backpropGradientHelper(final INDArray epsilon, } - return LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input, + Pair p = LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input, recurrentWeights, inputWeights, epsilon, truncatedBPTT, tbpttBackwardLength, fwdPass, true, LSTMParamInitializer.INPUT_WEIGHT_KEY, LSTMParamInitializer.RECURRENT_WEIGHT_KEY, LSTMParamInitializer.BIAS_KEY, gradientViews, null, false, helper); + + weightNoiseParams.clear(); + return p; } @@ -168,9 +166,9 @@ private FwdPassReturn activateHelper(final boolean training, final INDArray prev return ret; } - final INDArray recurrentWeights = getParam(LSTMParamInitializer.RECURRENT_WEIGHT_KEY); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG] - final INDArray inputWeights = getParam(LSTMParamInitializer.INPUT_WEIGHT_KEY); //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg] - final INDArray biases = getParam(LSTMParamInitializer.BIAS_KEY); //by row: IFOG //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T + final INDArray recurrentWeights = getParamWithNoise(LSTMParamInitializer.RECURRENT_WEIGHT_KEY, training); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG] + final INDArray inputWeights = getParamWithNoise(LSTMParamInitializer.INPUT_WEIGHT_KEY, training); //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg] + final INDArray biases = getParamWithNoise(LSTMParamInitializer.BIAS_KEY, training); //by row: IFOG //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T FwdPassReturn fwd = LSTMHelpers.activateHelper(this, this.conf, this.layerConf().getGateActivationFn(), this.input, recurrentWeights, inputWeights, biases, training, prevOutputActivations, @@ -185,11 +183,6 @@ private FwdPassReturn activateHelper(final boolean training, final INDArray prev return fwd; } - @Override - public INDArray activationMean() { - return activate(); - } - @Override public Type type() { return Type.RECURRENT; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java index 1ec1f78a9bda..3dc95acf1967 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java @@ -17,12 +17,12 @@ */ package org.deeplearning4j.nn.layers.recurrent; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Map; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java index c96b86ab8fbe..eaaf533eea39 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java @@ -1,7 +1,6 @@ package org.deeplearning4j.nn.layers.recurrent; import lombok.extern.slf4j.Slf4j; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -14,7 +13,6 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.BaseLayer; -import org.deeplearning4j.util.Dropout; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.api.blas.Level1; @@ -25,6 +23,7 @@ import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; import java.util.HashMap; @@ -99,14 +98,7 @@ static public FwdPassReturn activateHelper(final BaseLayer layer, final NeuralNe } - INDArray recurrentWeightsIFOG = recurrentWeights - .get(NDArrayIndex.all(), NDArrayIndex.interval(0, 4 * hiddenLayerSize)).dup('f'); - - - //Apply dropconnect to input (not recurrent) weights only: - if (conf.isUseDropConnect() && training && conf.getLayer().getDropOut() > 0) { - inputWeights = Dropout.applyDropConnect(layer, inputWeightKey); - } + INDArray recurrentWeightsIFOG = recurrentWeights.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 4 * hiddenLayerSize)).dup('f'); INDArray wFFTranspose = null; INDArray wOOTranspose = null; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java index 78e02467e0f0..dc736521f5c9 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java @@ -17,20 +17,19 @@ */ package org.deeplearning4j.nn.layers.recurrent; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseOutputLayer; import org.deeplearning4j.nn.params.DefaultParamInitializer; -import org.deeplearning4j.util.Dropout; import org.deeplearning4j.util.TimeSeriesUtils; import org.nd4j.linalg.activations.impl.ActivationSoftmax; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.transforms.SoftMax; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.ILossFunction; +import org.nd4j.linalg.primitives.Pair; import java.util.Arrays; @@ -64,6 +63,9 @@ public Pair backpropGradient(INDArray epsilon) { this.input = inputTemp; INDArray epsilon2d = gradAndEpsilonNext.getSecond(); INDArray epsilon3d = TimeSeriesUtils.reshape2dTo3d(epsilon2d, input.size(0)); + + weightNoiseParams.clear(); + return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d); } @@ -142,8 +144,7 @@ public INDArray output(boolean training) { return TimeSeriesUtils.reshape2dTo3d(out2d, input.size(0)); } - if (training) - applyDropOutIfNecessary(training); + applyDropOutIfNecessary(training); INDArray origInput = input; this.input = TimeSeriesUtils.reshape3dTo2d(input); INDArray out = super.activate(true); @@ -159,11 +160,8 @@ public INDArray activate(boolean training) { if (input.rank() != 3) throw new UnsupportedOperationException( "Input must be rank 3. Got input with rank " + input.rank() + " " + layerId()); - INDArray b = getParam(DefaultParamInitializer.BIAS_KEY); - INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY); - if (conf.isUseDropConnect() && training) { - W = Dropout.applyDropConnect(this, DefaultParamInitializer.WEIGHT_KEY); - } + INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training); + INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training); INDArray input2d = TimeSeriesUtils.reshape3dTo2d(input); diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java index 4699c3510dd0..a2e7f13ee371 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java @@ -18,7 +18,6 @@ package org.deeplearning4j.nn.layers.training; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -28,6 +27,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.ILossFunction; +import org.nd4j.linalg.primitives.Pair; /** @@ -168,9 +168,14 @@ public Pair backpropGradient(INDArray epsilon) { INDArray centersForExamples = labels.mmul(centers); INDArray dLcdai = input.sub(centersForExamples); - INDArray epsilonNext = params.get(CenterLossParamInitializer.WEIGHT_KEY).mmul(delta.transpose()).transpose(); + INDArray w = getParamWithNoise(CenterLossParamInitializer.WEIGHT_KEY, true); + + INDArray epsilonNext = w.mmul(delta.transpose()).transpose(); double lambda = layerConf().getLambda(); epsilonNext.addi(dLcdai.muli(lambda)); // add center loss here + + weightNoiseParams.clear(); + return new Pair<>(pair.getFirst(), epsilonNext); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java index 08350bebd177..186f7b5096b9 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java @@ -3,10 +3,10 @@ import lombok.AllArgsConstructor; import lombok.Data; import lombok.Getter; -import org.deeplearning4j.nn.api.layers.LayerConstraint; -import org.nd4j.linalg.primitives.Pair; +import lombok.Setter; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.variational.CompositeReconstructionDistribution; @@ -27,6 +27,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.ops.transforms.Transforms; +import org.nd4j.linalg.primitives.Pair; import java.util.*; @@ -74,6 +75,13 @@ public class VariationalAutoencoder implements Layer { protected boolean zeroedPretrainParamGradients = false; + protected Map weightNoiseParams = new HashMap<>(); + + @Getter @Setter + protected int iterationCount; + @Getter @Setter + protected int epochCount; + public VariationalAutoencoder(NeuralNetConfiguration conf) { this.conf = conf; @@ -132,6 +140,30 @@ public double score() { return score; } + protected INDArray getParamWithNoise(String param, boolean training){ + INDArray p; + if(layerConf().getWeightNoise() != null){ + if(training && weightNoiseParams.size() > 0 ){ + //Re-use these weights for both forward pass and backprop - don't want to use 2 different params here + //These should be cleared during backprop + return weightNoiseParams.get(param); + } else { + try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + p = layerConf().getWeightNoise().getParameter(this, param, getIterationCount(), getEpochCount(), training); + } + } + + if(training){ + //Store for re-use in backprop + weightNoiseParams.put(param, p); + } + } else { + return getParam(param); + } + + return p; + } + @Override public void computeGradientAndScore() { //Forward pass through the encoder and mean for P(Z|X) @@ -139,8 +171,8 @@ public void computeGradientAndScore() { IActivation afn = layerConf().getActivationFn(); //Forward pass through logStd^2 for P(Z|X) - INDArray pzxLogStd2W = params.get(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W); - INDArray pzxLogStd2b = params.get(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_B); + INDArray pzxLogStd2W = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W, true); + INDArray pzxLogStd2b = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_B, true); INDArray pzxLogStd2Pre = fwd.encoderActivations[fwd.encoderActivations.length - 1].mmul(pzxLogStd2W) .addiRowVector(pzxLogStd2b); @@ -178,8 +210,8 @@ public void computeGradientAndScore() { String wKey = "d" + i + WEIGHT_KEY_SUFFIX; String bKey = "d" + i + BIAS_KEY_SUFFIX; - INDArray weights = params.get(wKey); - INDArray bias = params.get(bKey); + INDArray weights = getParamWithNoise(wKey, true); + INDArray bias = getParamWithNoise(bKey, true); current = current.mmul(weights).addiRowVector(bias); decoderPreOut[i] = current.dup(); @@ -187,8 +219,8 @@ public void computeGradientAndScore() { decoderActivations[i] = current; } - INDArray pxzw = params.get(VariationalAutoencoderParamInitializer.PXZ_W); - INDArray pxzb = params.get(VariationalAutoencoderParamInitializer.PXZ_B); + INDArray pxzw = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_W, true); + INDArray pxzb = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_B, true); if (l == 0) { //Need to add other component of score, in addition to negative log probability @@ -258,7 +290,7 @@ public void computeGradientAndScore() { INDArray currentDelta = afn.backprop(decoderPreOut[i], epsilon).getFirst(); //TODO activation functions with params - INDArray weights = params.get(wKey); + INDArray weights = getParamWithNoise(wKey, true); INDArray dLdW = gradientViews.get(wKey); INDArray dLdB = gradientViews.get(bKey); @@ -287,8 +319,8 @@ public void computeGradientAndScore() { } //Do backprop through p(z|x) - INDArray eZXMeanW = params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W); - INDArray eZXLogStdev2W = params.get(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W); + INDArray eZXMeanW = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, true); + INDArray eZXLogStdev2W = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W, true); INDArray dLdz = epsilon; //If we were maximizing the equation in Kinga and Welling, this would be a .sub(meanZ). Here: we are minimizing the negative instead @@ -344,7 +376,7 @@ public void computeGradientAndScore() { String wKey = "e" + i + WEIGHT_KEY_SUFFIX; String bKey = "e" + i + BIAS_KEY_SUFFIX; - INDArray weights = params.get(wKey); + INDArray weights = getParamWithNoise(wKey, true); INDArray dLdW = gradientViews.get(wKey); INDArray dLdB = gradientViews.get(bKey); @@ -418,6 +450,8 @@ public void computeGradientAndScore() { g.put(VariationalAutoencoderParamInitializer.PXZ_B, gradientMap.get(VariationalAutoencoderParamInitializer.PXZ_B)); + weightNoiseParams.clear(); + this.gradient = gradient; } @@ -481,11 +515,6 @@ public void setBackpropGradientsViewArray(INDArray gradients) { this.gradientViews = conf.getLayer().initializer().getGradientsFromFlattened(conf, gradients); } - @Override - public void applyLearningRateScoreDecay() { - - } - @Override public void fit(INDArray data) { this.setInput(data); @@ -631,21 +660,6 @@ public Type type() { return Type.FEED_FORWARD; } - @Override - public Gradient error(INDArray input) { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - - @Override - public INDArray derivativeActivation(INDArray input) { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray indArray) { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - @Override public Pair backpropGradient(INDArray epsilon) { if (!zeroedPretrainParamGradients) { @@ -663,7 +677,7 @@ public Pair backpropGradient(INDArray epsilon) { INDArray currentDelta = pzxActivationFn.backprop(fwd.pzxMeanPreOut, epsilon).getFirst(); //Finally, calculate mean value: - INDArray meanW = params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W); + INDArray meanW = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, true); INDArray dLdMeanW = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W); //f order INDArray lastEncoderActivation = fwd.encoderActivations[fwd.encoderActivations.length - 1]; Nd4j.gemm(lastEncoderActivation, currentDelta, dLdMeanW, true, false, 1.0, 0.0); @@ -682,7 +696,7 @@ public Pair backpropGradient(INDArray epsilon) { String wKey = "e" + i + WEIGHT_KEY_SUFFIX; String bKey = "e" + i + BIAS_KEY_SUFFIX; - INDArray weights = params.get(wKey); + INDArray weights = getParamWithNoise(wKey, true); INDArray dLdW = gradientViews.get(wKey); INDArray dLdB = gradientViews.get(bKey); @@ -709,16 +723,6 @@ public Pair backpropGradient(INDArray epsilon) { return new Pair<>(gradient, epsilon); } - @Override - public void merge(Layer layer, int batchSize) { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - - @Override - public INDArray activationMean() { - throw new UnsupportedOperationException("Not supported " + layerId()); - } - @Override public INDArray preOutput(INDArray x) { return preOutput(x, TrainingMode.TEST); @@ -765,8 +769,8 @@ private VAEFwdHelper doForward(boolean training, boolean forBackprop) { String wKey = "e" + i + WEIGHT_KEY_SUFFIX; String bKey = "e" + i + BIAS_KEY_SUFFIX; - INDArray weights = params.get(wKey); - INDArray bias = params.get(bKey); + INDArray weights = getParamWithNoise(wKey, training); + INDArray bias = getParamWithNoise(bKey, training); current = current.mmul(weights).addiRowVector(bias); if (forBackprop) { @@ -777,8 +781,8 @@ private VAEFwdHelper doForward(boolean training, boolean forBackprop) { } //Finally, calculate mean value: - INDArray mW = params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W); - INDArray mB = params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_B); + INDArray mW = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, training); + INDArray mB = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_B, training); INDArray pzxMean = current.mmul(mW).addiRowVector(mB); @@ -921,6 +925,11 @@ public boolean isPretrainLayer() { return true; } + @Override + public void clearNoiseWeightParams() { + weightNoiseParams.clear(); + } + @Override public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { @@ -996,8 +1005,8 @@ public INDArray reconstructionLogProbability(INDArray data, int numSamples) { IActivation afn = layerConf().getActivationFn(); //Forward pass through logStd^2 for P(Z|X) - INDArray pzxLogStd2W = params.get(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W); - INDArray pzxLogStd2b = params.get(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_B); + INDArray pzxLogStd2W = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_W, false); + INDArray pzxLogStd2b = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_B, false); INDArray meanZ = fwd.pzxMeanPreOut; INDArray logStdev2Z = fwd.encoderActivations[fwd.encoderActivations.length - 1].mmul(pzxLogStd2W) @@ -1011,8 +1020,8 @@ public INDArray reconstructionLogProbability(INDArray data, int numSamples) { int minibatch = input.size(0); int size = fwd.pzxMeanPreOut.size(1); - INDArray pxzw = params.get(VariationalAutoencoderParamInitializer.PXZ_W); - INDArray pxzb = params.get(VariationalAutoencoderParamInitializer.PXZ_B); + INDArray pxzw = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_W, false); + INDArray pxzb = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_B, false); INDArray[] decoderWeights = new INDArray[decoderLayerSizes.length]; INDArray[] decoderBiases = new INDArray[decoderLayerSizes.length]; @@ -1020,8 +1029,8 @@ public INDArray reconstructionLogProbability(INDArray data, int numSamples) { for (int i = 0; i < decoderLayerSizes.length; i++) { String wKey = "d" + i + WEIGHT_KEY_SUFFIX; String bKey = "d" + i + BIAS_KEY_SUFFIX; - decoderWeights[i] = params.get(wKey); - decoderBiases[i] = params.get(bKey); + decoderWeights[i] = getParamWithNoise(wKey, false); + decoderBiases[i] = getParamWithNoise(bKey, false); } INDArray sumReconstructionNegLogProbability = null; @@ -1079,9 +1088,9 @@ public INDArray generateRandomGivenZ(INDArray latentSpaceValues) { } private INDArray decodeGivenLatentSpaceValues(INDArray latentSpaceValues) { - if (latentSpaceValues.size(1) != params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W).size(1)) { + if (latentSpaceValues.size(1) != getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, true).size(1)) { throw new IllegalArgumentException("Invalid latent space values: expected size " - + params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W).size(1) + + getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, false).size(1) + ", got size (dimension 1) = " + latentSpaceValues.size(1) + " " + layerId()); } @@ -1094,14 +1103,14 @@ private INDArray decodeGivenLatentSpaceValues(INDArray latentSpaceValues) { for (int i = 0; i < nDecoderLayers; i++) { String wKey = "d" + i + WEIGHT_KEY_SUFFIX; String bKey = "d" + i + BIAS_KEY_SUFFIX; - INDArray w = params.get(wKey); - INDArray b = params.get(bKey); + INDArray w = getParamWithNoise(wKey, false); + INDArray b = getParamWithNoise(bKey, false); currentActivations = currentActivations.mmul(w).addiRowVector(b); afn.getActivation(currentActivations, false); } - INDArray pxzw = params.get(VariationalAutoencoderParamInitializer.PXZ_W); - INDArray pxzb = params.get(VariationalAutoencoderParamInitializer.PXZ_B); + INDArray pxzw = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_W, false); + INDArray pxzb = getParamWithNoise(VariationalAutoencoderParamInitializer.PXZ_B, false); return currentActivations.mmul(pxzw).addiRowVector(pxzb); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index ee6f840ac4b4..4581f000c4a0 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -21,10 +21,9 @@ import lombok.Getter; import lombok.Setter; +import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.iterator.AsyncDataSetIterator; import org.deeplearning4j.datasets.iterator.MultiDataSetWrapperIterator; import org.deeplearning4j.eval.*; @@ -35,6 +34,7 @@ import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.api.layers.RecurrentLayer; import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -68,10 +68,9 @@ import org.nd4j.linalg.heartbeat.utils.TaskUtils; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.memory.abstracts.DummyWorkspace; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.primitives.Triple; import org.nd4j.linalg.util.FeatureUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.Serializable; import java.util.*; @@ -87,8 +86,8 @@ * * @author Adam Gibson */ +@Slf4j public class MultiLayerNetwork implements Serializable, Classifier, Layer, NeuralNetwork { - private static final Logger log = LoggerFactory.getLogger(MultiLayerNetwork.class); //the hidden neural network layers (including output layer) protected Layer[] layers; @@ -644,6 +643,8 @@ public void init(INDArray parameters, boolean cloneParametersArray) { solver.initOptimizer(); } } + + synchronizeIterEpochCounts(); } /** @@ -735,18 +736,6 @@ public INDArray activate(int layer, INDArray input) { return getLayer(layer).activate(input); } - @Override - public INDArray activationMean() { - //TODO determine how to pass back all activationMean for MLN - throw new UnsupportedOperationException(); - // List avgActivations = new ArrayList<>(); - // - // for( Layer layer: getLayers() ){ - // avgActivations.add(layer.activationMean()); - // } - // return Nd4j.toFlattened(avgActivations); - } - /** * Sets the input and labels from this dataset * @@ -1502,6 +1491,7 @@ public void updateRnnStateWithTBPTTState() { /** Equivalent to backprop(), but calculates gradient for truncated BPTT instead. */ protected void truncatedBPTTGradient() { + synchronizeIterEpochCounts(); if (flattenedGradients == null) { initGradientsView(); } @@ -2224,6 +2214,8 @@ public void computeGradientAndScore() { } truncatedBPTTGradient(); } else { + synchronizeIterEpochCounts(); + //First: do a feed-forward through the network //Note that we don't actually need to do the full forward pass through the output layer right now; but we do // need the input to the output layer to be set (such that backprop can be done) @@ -2258,6 +2250,9 @@ public void computeGradientAndScore() { } } } + + //Clear the post noise/dropconnect parameters on the output layer + getOutputLayer().clearNoiseWeightParams(); } @Override @@ -2284,42 +2279,6 @@ public void applyConstraints(int iteration, int epoch) { } } - /** - * Averages the given logistic regression - * from a mini batch in to this one - * - * @param layer the logistic regression to average in to this one - * @param batchSize the batch size - * @deprecated Not supported and not used - */ - @Override - @Deprecated - public void merge(Layer layer, int batchSize) { - throw new UnsupportedOperationException(); - } - - /** - * Deprecated: Merges this network with the other one. - * - * @param network the network to merge with - * @param batchSize the batch size (number of training examples) - * to average by - * @deprecated As of 0.7.3 - Feb 2017. No longer used; parameter averaging is performed via alternative means/methods - */ - @Deprecated - public void merge(MultiLayerNetwork network, int batchSize) { - if (network.layers.length != layers.length) - throw new IllegalArgumentException("Unable to merge networks that are not of equal length"); - for (int i = 0; i < getnLayers(); i++) { - Layer n = layers[i]; - Layer otherNetwork = network.layers[i]; - n.merge(otherNetwork, batchSize); - - } - - getOutputLayer().merge(network.getOutputLayer(), batchSize); - } - /** * Note that if input isn't null @@ -2364,18 +2323,6 @@ public void setParameters(INDArray params) { setParams(params); } - @Override - public void applyLearningRateScoreDecay() { - for (Layer layer : layers) { - if (!layer.conf().getLearningRateByParam().isEmpty()) { - for (Map.Entry lrPair : layer.conf().getLearningRateByParam().entrySet()) { - layer.conf().setLearningRateByParam(lrPair.getKey(), - lrPair.getValue() * (layer.conf().getLrPolicyDecayRate() + Nd4j.EPS_THRESHOLD)); - } - } - } - } - public NeuralNetConfiguration getDefaultConfiguration() { return defaultConfiguration; } @@ -2447,6 +2394,13 @@ public boolean isPretrainLayer() { return false; } + @Override + public void clearNoiseWeightParams() { + for(Layer l : layers){ + l.clearNoiseWeightParams(); + } + } + @Override public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { @@ -2489,26 +2443,11 @@ public Pair feedForwardMaskArray(INDArray maskArray, MaskSt //========== //Layer methods - @Override - public Gradient error(INDArray errorSignal) { - throw new UnsupportedOperationException(); - } - @Override public Type type() { return Type.MULTILAYER; } - @Override - public INDArray derivativeActivation(INDArray input) { - throw new UnsupportedOperationException(); - } - - @Override - public Gradient calcGradient(Gradient layerError, INDArray activation) { - throw new UnsupportedOperationException(); - } - @Override public INDArray preOutput(INDArray x) { INDArray lastLayerActivation = x; @@ -2562,6 +2501,26 @@ public int getIndex() { return layerIndex; } + @Override + public int getIterationCount() { + return getLayerWiseConfigurations().getIterationCount(); + } + + @Override + public int getEpochCount() { + return getLayerWiseConfigurations().getEpochCount(); + } + + @Override + public void setIterationCount(int iterationCount) { + getLayerWiseConfigurations().setIterationCount(iterationCount); + } + + @Override + public void setEpochCount(int epochCount) { + getLayerWiseConfigurations().setEpochCount(epochCount); + } + @Override public double calcL2(boolean backpropParamsOnly) { double l2 = 0.0; @@ -3067,7 +3026,7 @@ public String summary(InputType inputType) { paramShape = ""; in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn()); out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut()); - Set paraNames = currentLayer.conf().getLearningRateByParam().keySet(); + Set paraNames = currentLayer.paramTable().keySet(); for (String aP : paraNames) { String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape()); paramShape += aP + ":" + paramS + ", "; @@ -3124,6 +3083,17 @@ public void incrementEpochCount(){ layerWiseConfigurations.setEpochCount(layerWiseConfigurations.getEpochCount() + 1); } + + protected void synchronizeIterEpochCounts(){ + //TODO: this is necessrry for some schedules - but the redundant values are a little ugly... + int currIter = getIterationCount(); + int currEpoch = getEpochCount(); + for(Layer l : layers){ + l.setIterationCount(currIter); + l.setEpochCount(currEpoch); + } + } + /** * Indicates whether some other object is "equal to" this one. *

diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java index 9233e69fd680..fc398609c0be 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java @@ -9,7 +9,10 @@ import org.nd4j.linalg.api.rng.distribution.Distribution; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.*; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; /** * Parameter initializer for the Variational Autoencoder model. diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java index cabece573850..5b2793739db6 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java @@ -8,8 +8,11 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.distribution.Distribution; +import org.deeplearning4j.nn.conf.dropout.Dropout; +import org.deeplearning4j.nn.conf.dropout.IDropout; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.stepfunctions.StepFunction; +import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; import org.deeplearning4j.nn.weights.WeightInit; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; @@ -20,7 +23,6 @@ import java.io.IOException; import java.util.List; -import java.util.Map; /** * Created by Alex on 21/02/2017. @@ -37,46 +39,23 @@ public class FineTuneConfiguration { protected WeightInit weightInit; protected Double biasInit; protected Distribution dist; - protected Double learningRate; - protected Double biasLearningRate; - protected Map learningRateSchedule; - protected Double lrScoreBasedDecay; protected Double l1; protected Double l2; protected Double l1Bias; protected Double l2Bias; - protected Double dropOut; - @Deprecated - protected Updater updater; + protected IDropout dropout; + protected IWeightNoise weightNoise; protected IUpdater iUpdater; - @Deprecated - protected Double momentum; - @Deprecated - protected Map momentumSchedule; - @Deprecated - protected Double epsilon; - @Deprecated - protected Double rho; - @Deprecated - protected Double rmsDecay; - @Deprecated - protected Double adamMeanDecay; - @Deprecated - protected Double adamVarDecay; + protected IUpdater biasUpdater; protected Boolean miniBatch; protected Integer numIterations; protected Integer maxNumLineSearchIterations; protected Long seed; protected OptimizationAlgorithm optimizationAlgo; protected StepFunction stepFunction; - protected Boolean useDropConnect; protected Boolean minimize; protected GradientNormalization gradientNormalization; protected Double gradientNormalizationThreshold; - protected LearningRatePolicy learningRatePolicy; - protected Double lrPolicyDecayRate; - protected Double lrPolicySteps; - protected Double lrPolicyPower; protected ConvolutionMode convolutionMode; protected List constraints; protected Boolean hasBiasConstraints; @@ -105,19 +84,15 @@ public Builder seed(long seed) { return this; } - /** - * @deprecated No longer used - */ - @Deprecated - public Builder regularization(boolean regularization) { - return this; - } - public Builder iterations(int iterations) { this.numIterations = iterations; return this; } + public Builder dropOut(double dropout){ + return dropout(new Dropout(dropout)); + } + public Builder activation(Activation activation) { this.activationFn = activation.getActivationFunction(); return this; @@ -127,8 +102,8 @@ public Builder updater(IUpdater updater) { return iUpdater(updater); } + @Deprecated public Builder updater(Updater updater) { - this.updater = updater; return updater(updater.getIUpdaterWithDefaultConfig()); } } @@ -147,13 +122,12 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) { WeightInit origWeightInit = null; if (l != null) { - if (dropOut != null) - l.setDropOut(dropOut); + if (dropout != null) + l.setIDropout(dropout); } if (l != null && l instanceof BaseLayer) { BaseLayer bl = (BaseLayer) l; - originalUpdater = bl.getUpdater(); origWeightInit = bl.getWeightInit(); if (activationFn != null) bl.setActivationFn(activationFn); @@ -163,18 +137,6 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) { bl.setBiasInit(biasInit); if (dist != null) bl.setDist(dist); - if (learningRate != null) { - //usually the same learning rate is applied to both bias and weights - //so always overwrite the learning rate to both? - bl.setLearningRate(learningRate); - bl.setBiasLearningRate(learningRate); - - } - if (biasLearningRate != null) - bl.setBiasLearningRate(biasLearningRate); - if (learningRateSchedule != null) - bl.setLearningRateSchedule(learningRateSchedule); - // if(lrScoreBasedDecay != null) if (l1 != null) bl.setL1(l1); if (l2 != null) @@ -183,28 +145,19 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) { bl.setL1Bias(l1Bias); if (l2Bias != null) bl.setL2Bias(l2Bias); - if (updater != null) - bl.setUpdater(updater); - if (iUpdater != null) - bl.setIUpdater(iUpdater); - if (momentum != null) - bl.setMomentum(momentum); - if (momentumSchedule != null) - bl.setMomentum(momentum); - if (epsilon != null) - bl.setEpsilon(epsilon); - if (rho != null) - bl.setRho(rho); - if (rmsDecay != null) - bl.setRmsDecay(rmsDecay); - if (adamMeanDecay != null) - bl.setAdamMeanDecay(adamMeanDecay); - if (adamVarDecay != null) - bl.setAdamVarDecay(adamVarDecay); if (gradientNormalization != null) bl.setGradientNormalization(gradientNormalization); if (gradientNormalizationThreshold != null) bl.setGradientNormalizationThreshold(gradientNormalizationThreshold); + if (iUpdater != null){ + bl.setIUpdater(iUpdater); + } + if (biasUpdater != null){ + bl.setBiasUpdater(biasUpdater); + } + if (weightNoise != null){ + bl.setWeightNoise(weightNoise); + } } if (miniBatch != null) nnc.setMiniBatch(miniBatch); @@ -218,16 +171,8 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) { nnc.setOptimizationAlgo(optimizationAlgo); if (stepFunction != null) nnc.setStepFunction(stepFunction); - if (useDropConnect != null) - nnc.setUseDropConnect(useDropConnect); if (minimize != null) nnc.setMinimize(minimize); - if (learningRatePolicy != null) - nnc.setLearningRatePolicy(learningRatePolicy); - if (lrPolicySteps != null) - nnc.setLrPolicySteps(lrPolicySteps); - if (lrPolicyPower != null) - nnc.setLrPolicyPower(lrPolicyPower); if (convolutionMode != null && l instanceof ConvolutionLayer) { ((ConvolutionLayer) l).setConvolutionMode(convolutionMode); @@ -236,47 +181,6 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) { ((SubsamplingLayer) l).setConvolutionMode(convolutionMode); } - //Check the updater config. If we change updaters, we want to remove the old config to avoid warnings - if (l != null && l instanceof BaseLayer && updater != null && originalUpdater != null - && updater != originalUpdater) { - BaseLayer bl = (BaseLayer) l; - switch (originalUpdater) { - case ADAM: - case ADAMAX: - if (adamMeanDecay == null) - bl.setAdamMeanDecay(Double.NaN); - if (adamVarDecay == null) - bl.setAdamVarDecay(Double.NaN); - break; - case ADADELTA: - if (rho == null) - bl.setRho(Double.NaN); - if (epsilon == null) - bl.setEpsilon(Double.NaN); - break; - case NESTEROVS: - if (momentum == null) - bl.setMomentum(Double.NaN); - if (momentumSchedule == null) - bl.setMomentumSchedule(null); - if (epsilon == null) - bl.setEpsilon(Double.NaN); - break; - case ADAGRAD: - if (epsilon == null) - bl.setEpsilon(Double.NaN); - break; - case RMSPROP: - if (rmsDecay == null) - bl.setRmsDecay(Double.NaN); - if (epsilon == null) - bl.setEpsilon(Double.NaN); - break; - - //Other cases: no changes required - } - } - //Check weight init. Remove dist if originally was DISTRIBUTION, and isn't now -> remove no longer needed distribution if (l != null && l instanceof BaseLayer && origWeightInit == WeightInit.DISTRIBUTION && weightInit != null && weightInit != WeightInit.DISTRIBUTION) { @@ -285,12 +189,7 @@ public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) { //Perform validation. This also sets the defaults for updaters. For example, Updater.RMSProp -> set rmsDecay if (l != null) { - LayerValidation.updaterValidation(l.getLayerName(), l, learningRate, momentum, momentumSchedule, - adamMeanDecay, adamVarDecay, rho, rmsDecay, epsilon); - - boolean useDropCon = (useDropConnect == null ? nnc.isUseDropConnect() : useDropConnect); - LayerValidation.generalValidation(l.getLayerName(), l, useDropCon, dropOut, l2, l2Bias, l1, l1Bias, - dist, constraints, null, null); + LayerValidation.generalValidation(l.getLayerName(), l, dropout, l2, l2Bias, l1, l1Bias, dist, constraints, null, null); } //Also: update the LR, L1 and L2 maps, based on current config (which might be different to original config) @@ -337,16 +236,6 @@ public NeuralNetConfiguration.Builder appliedNeuralNetConfigurationBuilder() { confBuilder.setBiasInit(biasInit); if (dist != null) confBuilder.setDist(dist); - if (learningRate != null) { - //usually the same learning rate is applied to both bias and weights - //HOWEVER: this is set elsewhere. in the NNC, we only want to override the normal LR - confBuilder.setLearningRate(learningRate); - } - if (biasLearningRate != null) - confBuilder.setBiasLearningRate(biasLearningRate); - if (learningRateSchedule != null) - confBuilder.setLearningRateSchedule(learningRateSchedule); - // if(lrScoreBasedDecay != null) if (l1 != null) confBuilder.setL1(l1); if (l2 != null) @@ -355,26 +244,12 @@ public NeuralNetConfiguration.Builder appliedNeuralNetConfigurationBuilder() { confBuilder.setL1Bias(l1Bias); if (l2Bias != null) confBuilder.setL2Bias(l2Bias); - if (dropOut != null) - confBuilder.setDropOut(dropOut); + if (dropout != null) + confBuilder.setIdropOut(dropout); if (iUpdater != null) confBuilder.updater(iUpdater); - if (updater != null) - confBuilder.setUpdater(updater); - if (momentum != null) - confBuilder.setMomentum(momentum); - if (momentumSchedule != null) - confBuilder.setMomentum(momentum); - if (epsilon != null) - confBuilder.setEpsilon(epsilon); - if (rho != null) - confBuilder.setRho(rho); - if (rmsDecay != null) - confBuilder.setRmsDecay(rmsDecay); - if (adamMeanDecay != null) - confBuilder.setAdamMeanDecay(adamMeanDecay); - if (adamVarDecay != null) - confBuilder.setAdamVarDecay(adamVarDecay); + if(biasUpdater != null) + confBuilder.biasUpdater(biasUpdater); if (miniBatch != null) confBuilder.setMiniBatch(miniBatch); if (numIterations != null) @@ -387,21 +262,12 @@ public NeuralNetConfiguration.Builder appliedNeuralNetConfigurationBuilder() { confBuilder.setOptimizationAlgo(optimizationAlgo); if (stepFunction != null) confBuilder.setStepFunction(stepFunction); - if (useDropConnect != null) - confBuilder.setUseDropConnect(useDropConnect); if (minimize != null) confBuilder.setMinimize(minimize); if (gradientNormalization != null) confBuilder.setGradientNormalization(gradientNormalization); if (gradientNormalizationThreshold != null) confBuilder.setGradientNormalizationThreshold(gradientNormalizationThreshold); - if (learningRatePolicy != null) - confBuilder.setLearningRatePolicy(learningRatePolicy); - if (lrPolicySteps != null) - confBuilder.setLrPolicySteps(lrPolicySteps); - if (lrPolicyPower != null) - confBuilder.setLrPolicyPower(lrPolicyPower); - return confBuilder; } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java index 12b985780720..76a7113a79f2 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java @@ -295,12 +295,10 @@ public MultiLayerNetwork build() { origNNC.variables(false).add(s); origNNC.getL1ByParam().put(s, 0.0); origNNC.getL2ByParam().put(s, 0.0); - origNNC.getLearningRateByParam().put(s, 0.0); layerNNC.variables(false).add(s); layerNNC.getL1ByParam().put(s, 0.0); layerNNC.getL2ByParam().put(s, 0.0); - layerNNC.getLearningRateByParam().put(s, 0.0); } } @@ -817,7 +815,6 @@ public ComputationGraph build() { newNNC.variables(false).add(s); newNNC.getL1ByParam().put(s, 0.0); newNNC.getL2ByParam().put(s, 0.0); - newNNC.getLearningRateByParam().put(s, 0.0); } //We also need to place the layer in the CompGraph Layer[] (replacing the old one) diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java index 1dc99154086c..fdeac82739c1 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java @@ -15,9 +15,7 @@ import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.api.ops.impl.accum.Norm2; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.indexing.BooleanIndexing; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.linalg.indexing.conditions.Conditions; import java.util.ArrayList; import java.util.HashMap; @@ -78,7 +76,7 @@ public BaseMultiLayerUpdater(T network, INDArray updaterState) { for (int j = 0; j < variables.size(); j++) { String var = variables.get(j); int paramSizeThisVariable = layerParamTable.get(var).length(); - int updaterStateSizeThisVariable = (int) layers[i].conf().getLayer().getIUpdaterByParam(var) + int updaterStateSizeThisVariable = (int) layers[i].conf().getLayer().getUpdaterByParam(var) .stateSize(paramSizeThisVariable); INDArray gradientViewSubset = null; @@ -207,8 +205,8 @@ public INDArray getStateViewArray() { } @Override - public void update(Layer layer, Gradient gradient, int iteration, int batchSize) { - update(gradient, iteration, batchSize); + public void update(Layer layer, Gradient gradient, int iteration, int epoch, int batchSize) { + update(gradient, iteration, epoch, batchSize); } /** @@ -222,7 +220,7 @@ public void update(Layer layer, Gradient gradient, int iteration, int batchSize) * @param iteration The current iteration (i.e., number of parameter updates so far) * @param batchSize The current minibatch size (number of examples) */ - public void update(Gradient gradient, int iteration, int batchSize) { + public void update(Gradient gradient, int iteration, int epoch, int batchSize) { //First: check if gradient is standard or external... //In a MultiLayerNetwork, the INDArray returned by .gradient() is always the standard full view array @@ -277,19 +275,19 @@ public void update(Gradient gradient, int iteration, int batchSize) { .getAndActivateWorkspace(ComputationGraph.workspaceFeedForward)) { if (isExternal) { //RL4J etc type case: calculate gradients in 1 net, update them in another - ub.updateExternalGradient(iteration, gradient.gradient(), getParams()); + ub.updateExternalGradient(iteration, epoch, gradient.gradient(), getParams()); } else { //Standard case - ub.update(iteration); + ub.update(iteration, epoch); } } } else { if (isExternal) { //RL4J etc type case: calculate gradients in 1 net, update them in another - ub.updateExternalGradient(iteration, gradient.gradient(), getParams()); + ub.updateExternalGradient(iteration, epoch, gradient.gradient(), getParams()); } else { //Standard case - ub.update(iteration); + ub.update(iteration, epoch); } } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java index 9ef2491e5c5d..6f17f3acaa2d 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java @@ -3,14 +3,12 @@ import lombok.AllArgsConstructor; import lombok.Data; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.LearningRatePolicy; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.learning.GradientUpdater; -import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.ops.transforms.Transforms; import java.util.ArrayList; @@ -73,7 +71,7 @@ public void init() { if (gradientUpdater == null) { ParamState varState = layersAndVariablesInBlock.get(0); String varName = varState.getParamName(); - gradientUpdater = varState.getLayer().conf().getLayer().getIUpdaterByParam(varName).instantiate(updaterView, + gradientUpdater = varState.getLayer().conf().getLayer().getUpdaterByParam(varName).instantiate(updaterView, updaterViewRequiresInitialization); //UpdaterUtils.getGradientUpdater(varState.getLayer(), varState.getParamName()); } } @@ -103,17 +101,17 @@ public GradientUpdater getGradientUpdater() { * * @param iteration The current iteration (i.e., total number of parameter updates so far) */ - public void update(int iteration) { - update(iteration, false, gradientView, null); + public void update(int iteration, int epoch) { + update(iteration, epoch, false, gradientView, null); } - public void updateExternalGradient(int iteration, INDArray fullNetworkGradientView, + public void updateExternalGradient(int iteration, int epoch, INDArray fullNetworkGradientView, INDArray fullNetworkParamsArray) { //Extract the relevant subset from the external network - update(iteration, true, fullNetworkGradientView, fullNetworkParamsArray); + update(iteration, epoch, true, fullNetworkGradientView, fullNetworkParamsArray); } - private void update(int iteration, boolean externalGradient, INDArray fullNetworkGradientView, + private void update(int iteration, int epoch, boolean externalGradient, INDArray fullNetworkGradientView, INDArray fullNetworkParamsArray) { //Initialize the updater, if necessary if (gradientUpdater == null) { @@ -138,14 +136,9 @@ private void update(int iteration, boolean externalGradient, INDArray fullNetwor //No params for this layer return; } - BaseLayer baseLayer = (BaseLayer) l0.conf().getLayer(); - LearningRatePolicy lrPolicy = l0.conf().getLearningRatePolicy(); - if (lrPolicy != LearningRatePolicy.None || baseLayer.getIUpdater() instanceof Nesterovs) { - applyLrDecayPolicy(lrPolicy, iteration); - } //Apply the updater itself - gradientUpdater.applyUpdater(blockGradViewArray, iteration); + gradientUpdater.applyUpdater(blockGradViewArray, iteration, epoch); //Post apply: l1 and l2 by params for (ParamState p : layersAndVariablesInBlock) { @@ -190,88 +183,4 @@ public void postApply(Layer layer, String paramName, INDArray gradientView, INDA gradientView.addi(Transforms.sign(paramsView, true).muli(conf.getL1ByParam(paramName))); } } - - /** - * Apply learning rate decay, based on the configuration - * - * @param decay Learning rate schedule enumeration - * @param iteration Current iteration - */ - public void applyLrDecayPolicy(LearningRatePolicy decay, int iteration) { - Layer layer = layersAndVariablesInBlock.get(0).getLayer(); - String variable = layersAndVariablesInBlock.get(0).getParamName(); - - NeuralNetConfiguration conf = layer.conf(); - double decayRate = layer.conf().getLrPolicyDecayRate(); - double lr = conf.getLearningRateByParam(variable); - - if (!(conf.getLayer() instanceof BaseLayer)) { - //No params - return; - } - - BaseLayer baseLayer = (BaseLayer) conf.getLayer(); - - double newLr; - switch (decay) { - case Exponential: - newLr = lr * Math.pow(decayRate, iteration); - break; - case Inverse: - newLr = lr / Math.pow((1 + decayRate * iteration), conf.getLrPolicyPower()); - break; - case Step: - newLr = lr * Math.pow(decayRate, Math.floor(iteration / conf.getLrPolicySteps())); - break; - case TorchStep: - if (iteration > 1 && conf.getLrPolicySteps() % iteration == 0) { - newLr = lr * decayRate; - } else { - newLr = lr; - } - break; - case Poly: - newLr = lr * Math.pow((1 - ((double) iteration) / conf.getNumIterations()), conf.getLrPolicyPower()); - break; - case Sigmoid: - newLr = lr / (1 + Math.exp(-decayRate * (iteration - conf.getLrPolicySteps()))); - break; - case Schedule: - if (baseLayer.getLearningRateSchedule().containsKey(iteration)) { - newLr = baseLayer.getLearningRateSchedule().get(iteration); - } else { - newLr = lr; - } - break; - case None: - case Score: - newLr = lr; - break; - default: - throw new RuntimeException("Unknown Learning rate decay value: " + decay); - } - - //Handle momentum schedules. Given the new updater design, this change is purely cosmetic - double newMomentum = 0.0; - if (baseLayer.getIUpdater() instanceof Nesterovs) { - if (baseLayer.getMomentumSchedule() != null && baseLayer.getMomentumSchedule().containsKey(iteration)) { - newMomentum = baseLayer.getMomentumSchedule().get(iteration); - } else { - newMomentum = baseLayer.getMomentum(); - } - } - - //Need to set the LR for *all* variables in the Updater block. All variables (by definition of being in the - // same block) share the same LR schedule - for (ParamState vs : layersAndVariablesInBlock) { - vs.getLayer().conf().setLearningRateByParam(vs.getParamName(), newLr); - if (((BaseLayer) layer.conf().getLayer()).getIUpdater() instanceof Nesterovs) { - ((BaseLayer) vs.getLayer().conf().getLayer()).setMomentum(newMomentum); - } - } - - //Apply the new LR according to the schedule. - //Note: momentum schedules are applied internally in the Nesterov config object applySchedules method - gradientUpdater.getConfig().applySchedules(iteration, newLr); - } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java index a4764bbf3881..9008ea54d9cc 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java @@ -1,12 +1,8 @@ package org.deeplearning4j.nn.updater; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.LearningRatePolicy; -import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.nd4j.linalg.learning.config.IUpdater; -import java.util.Objects; - /** * Created by Alex on 14/04/2017. */ @@ -16,19 +12,16 @@ public class UpdaterUtils { public static boolean updaterConfigurationsEquals(Layer layer1, String param1, Layer layer2, String param2) { org.deeplearning4j.nn.conf.layers.Layer l1 = layer1.conf().getLayer(); org.deeplearning4j.nn.conf.layers.Layer l2 = layer2.conf().getLayer(); - IUpdater u1 = l1.getIUpdaterByParam(param1); - IUpdater u2 = l2.getIUpdaterByParam(param2); - if (!u1.equals(u2)) { - //Different updaters or different config - return false; - } + IUpdater u1 = l1.getUpdaterByParam(param1); + IUpdater u2 = l2.getUpdaterByParam(param2); + //For updaters to be equal (and hence combinable), we require that: - //(a) The updater-specific configurations are equal (inc. LR) - //(b) The learning rate *schedules* are equal - //(c) If one or more of the params are pretrainable params, they are in the same layer + //(a) The updater-specific configurations are equal (inc. LR, LR/momentum schedules etc) + //(b) If one or more of the params are pretrainable params, they are in the same layer // This last point is necessary as we don't want to modify the pretrain gradient/updater state during // backprop, or modify the pretrain gradient/updater state of one layer while training another - if (!lrSchedulesEqual(layer1, param1, layer2, param2)) { + if (!u1.equals(u2)) { + //Different updaters or different config return false; } @@ -43,61 +36,4 @@ public static boolean updaterConfigurationsEquals(Layer layer1, String param1, L return true; } - - public static boolean lrSchedulesEqual(Layer layer1, String param1, Layer layer2, String param2) { - - LearningRatePolicy lp1 = layer1.conf().getLearningRatePolicy(); - LearningRatePolicy lp2 = layer2.conf().getLearningRatePolicy(); - - if (lp1 != lp2) { - return false; - } - - double lr1 = layer1.conf().getLearningRateByParam(param1); - double lr2 = layer2.conf().getLearningRateByParam(param2); - if (lr1 != lr2) { - return false; - } - - double dr1 = layer1.conf().getLrPolicyDecayRate(); - double dr2 = layer2.conf().getLrPolicyDecayRate(); - - boolean lrConfigEqual; - switch (lp1) { - case None: - lrConfigEqual = true; - break; - case Exponential: - lrConfigEqual = dr1 == dr2; - break; - case Inverse: - lrConfigEqual = dr1 == dr2 && layer1.conf().getLrPolicyPower() == layer2.conf().getLrPolicyPower(); - break; - case Poly: - lrConfigEqual = layer1.conf().getLrPolicyPower() == layer2.conf().getLrPolicyPower(); - break; - case Sigmoid: - lrConfigEqual = dr1 == dr2 && layer1.conf().getLrPolicySteps() == layer2.conf().getLrPolicySteps(); - break; - case Step: - lrConfigEqual = dr1 == dr2 && layer1.conf().getLrPolicySteps() == layer2.conf().getLrPolicySteps(); - break; - case TorchStep: - lrConfigEqual = layer1.conf().getLrPolicyPower() == layer2.conf().getLrPolicyPower(); - break; - case Schedule: - BaseLayer bl1 = (BaseLayer) layer1.conf().getLayer(); - BaseLayer bl2 = (BaseLayer) layer2.conf().getLayer(); - lrConfigEqual = Objects.equals(bl1.getLearningRateSchedule(), bl2.getLearningRateSchedule()); - break; - case Score: - //TODO - might be ok sometimes?? - lrConfigEqual = false; - break; - default: - throw new UnsupportedOperationException("Unknown learning rate schedule: " + lp1); - } - - return lrConfigEqual; - } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java index 58b5b69c28be..5062308b83eb 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java @@ -18,7 +18,6 @@ package org.deeplearning4j.optimize.api; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -26,6 +25,7 @@ import org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater; import org.deeplearning4j.optimize.solvers.accumulation.GradientsAccumulator; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.io.Serializable; import java.util.Collection; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java index b04d71e8a202..d34b11e0622f 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java @@ -1,8 +1,8 @@ package org.deeplearning4j.optimize.listeners; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.optimize.api.IterationListener; +import org.nd4j.linalg.primitives.Pair; import java.io.File; import java.io.FileOutputStream; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java index d49269f1809f..f909e80993fb 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java @@ -19,13 +19,11 @@ package org.deeplearning4j.optimize.solvers; import lombok.Getter; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.exception.InvalidStepException; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.LearningRatePolicy; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; @@ -42,6 +40,7 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -276,10 +275,6 @@ public boolean checkTerminalConditions(INDArray gradient, double oldScore, doubl if (condition.terminate(score, oldScore, new Object[] {gradient})) { log.debug("Hit termination condition on iteration {}: score={}, oldScore={}, condition={}", i, score, oldScore, condition); - if (condition instanceof EpsTermination && conf.getLayer() != null - && conf.getLearningRatePolicy() == LearningRatePolicy.Score) { - model.applyLearningRateScoreDecay(); - } return true; } } @@ -323,7 +318,7 @@ public void updateGradientAccordingToParams(Gradient gradient, Model model, int computationGraphUpdater = new ComputationGraphUpdater(graph); } } - computationGraphUpdater.update(gradient, getIterationCount(model), batchSize); + computationGraphUpdater.update(gradient, getIterationCount(model), getEpochCount(model), batchSize); } else { if (updater == null) { try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { @@ -332,7 +327,7 @@ public void updateGradientAccordingToParams(Gradient gradient, Model model, int } Layer layer = (Layer) model; - updater.update(layer, gradient, getIterationCount(model), batchSize); + updater.update(layer, gradient, getIterationCount(model), getEpochCount(model), batchSize); } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java index fc1da60008a0..0d279279173e 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java @@ -18,7 +18,6 @@ package org.deeplearning4j.optimize.solvers; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; @@ -27,6 +26,7 @@ import org.deeplearning4j.optimize.api.TerminationCondition; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.Collection; import java.util.Iterator; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java index 0a08882cec6d..fa9e7885053c 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java @@ -19,7 +19,6 @@ package org.deeplearning4j.optimize.solvers; import lombok.extern.slf4j.Slf4j; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; @@ -29,6 +28,7 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.Collection; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Dropout.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Dropout.java deleted file mode 100644 index 54c95507600b..000000000000 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Dropout.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.deeplearning4j.util; - -import org.deeplearning4j.nn.api.Layer; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.impl.transforms.LegacyDropOut; -import org.nd4j.linalg.api.ops.impl.transforms.LegacyDropOutInverted; -import org.nd4j.linalg.api.ops.random.impl.DropOut; -import org.nd4j.linalg.api.ops.random.impl.DropOutInverted; -import org.nd4j.linalg.factory.Nd4j; - - -/** - * @author Adam Gibson - */ -public class Dropout { - - private Dropout() {} - - /** - * Apply drop connect to the given variable - * @param layer the layer with the variables - * @param variable the variable to apply - * @return the post applied drop connect - */ - public static INDArray applyDropConnect(Layer layer, String variable) { - INDArray result = layer.getParam(variable).dup(); - if (Nd4j.getRandom().getStatePointer() != null) { - Nd4j.getExecutioner().exec(new DropOut(result, result, layer.conf().getLayer().getDropOut())); - } else { - Nd4j.getExecutioner().exec(new LegacyDropOut(result, result, layer.conf().getLayer().getDropOut())); - } - return result; - } - - /** - * Apply dropout to the given input - * and return the drop out mask used - * @param input the input to do drop out on - * @param dropout the drop out probability - */ - public static void applyDropout(INDArray input, double dropout) { - if (Nd4j.getRandom().getStatePointer() != null) { - Nd4j.getExecutioner().exec(new DropOutInverted(input, dropout)); - } else { - Nd4j.getExecutioner().exec(new LegacyDropOutInverted(input, dropout)); - } - } - - -} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/InputSplit.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/InputSplit.java index 1c9e3ea86d0c..9b71dbfb48bd 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/InputSplit.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/InputSplit.java @@ -18,8 +18,8 @@ package org.deeplearning4j.util; -import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.List; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/LayerValidation.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/LayerValidation.java deleted file mode 100644 index e8648f7cc167..000000000000 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/LayerValidation.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.deeplearning4j.util; - -import org.deeplearning4j.exception.DL4JInvalidConfigException; - -/** - * Created by Alex on 12/11/2016. - */ -public class LayerValidation { - - /** - * Asserts that the layer nIn and nOut values are set for the layer - * - * @param layerType Type of layer ("DenseLayer", etc) - * @param layerName Name of the layer (may be null if not set) - * @param layerIndex Index of the layer - * @param nIn nIn value - * @param nOut nOut value - */ - public static void assertNInNOutSet(String layerType, String layerName, int layerIndex, int nIn, int nOut) { - if (nIn <= 0 || nOut <= 0) { - if (layerName == null) - layerName = "(name not set)"; - throw new DL4JInvalidConfigException(layerType + " (index=" + layerIndex + ", name=" + layerName + ") nIn=" - + nIn + ", nOut=" + nOut + "; nIn and nOut must be > 0"); - } - } -} diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Viterbi.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Viterbi.java index bdadc88f4181..0ba3cc53a39b 100755 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Viterbi.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/Viterbi.java @@ -19,9 +19,9 @@ package org.deeplearning4j.util; import org.apache.commons.math3.util.FastMath; -import org.nd4j.linalg.primitives.Pair; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.io.Serializable; diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java index fbfa08a698d0..c4cdeb05f7f7 100644 --- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java +++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java @@ -2,10 +2,8 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -17,6 +15,7 @@ import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; /** @@ -42,11 +41,9 @@ public void testWrapper() throws Exception { log.info("Build model...."); MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) - .regularization(true).l2(0.0005).learningRate(0.01)//.biasLearningRate(0.02) - //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) + .l2(0.0005) .weightInit(WeightInit.XAVIER) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS) - .momentum(0.9).list() + .updater(new Nesterovs(0.01, 0.9)).list() .layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java index 8abfa0e81bf7..2dd47b3b7fb4 100644 --- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java +++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java @@ -3,7 +3,6 @@ import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import org.datavec.api.util.ClassPathResource; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -19,6 +18,7 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.io.File; import java.util.Arrays; diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java index 65070223b944..edd9b4258100 100644 --- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java +++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java @@ -2,10 +2,8 @@ import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.eval.Evaluation; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -19,6 +17,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,11 +47,10 @@ public void testParallelWrapperRun() throws Exception { log.info("Build model...."); MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) - .regularization(true).l2(0.0005).learningRate(0.01)//.biasLearningRate(0.02) + .l2(0.0005) //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) .weightInit(WeightInit.XAVIER) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS) - .momentum(0.9).list() + .updater(new Nesterovs(0.01, 0.9)).list() .layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java index aabf4829e7e6..547279fbca77 100644 --- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java +++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java @@ -13,7 +13,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; @@ -22,6 +21,7 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.concurrent.TimeUnit; @@ -82,7 +82,7 @@ public class TestParallelEarlyStopping { public void testEarlyStoppingEveryNEpoch() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); @@ -114,7 +114,7 @@ public void testBadTuning() { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(1.0) //Intentionally huge LR + .updater(new Sgd(1.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java index 2212a1a8bafc..d9a8229a9474 100644 --- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java +++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java @@ -12,7 +12,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -23,6 +22,7 @@ import org.junit.Ignore; import org.junit.Test; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import static org.junit.Assert.assertEquals; @@ -37,7 +37,7 @@ public void testParallelStatsListenerCompatibility() throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) .layer(1, new OutputLayer.Builder().nIn(3).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) diff --git a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java index 2ade1dfcfbcb..1bab6b2b5544 100644 --- a/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java +++ b/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java @@ -2,10 +2,8 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -17,6 +15,7 @@ import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.io.File; @@ -46,11 +45,9 @@ public void runParallelWrapperMain() throws Exception { log.info("Build model...."); MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) - .regularization(true).l2(0.0005).learningRate(0.01)//.biasLearningRate(0.02) - //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) + .l2(0.0005) .weightInit(WeightInit.XAVIER) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS) - .momentum(0.9).list() + .updater(new Nesterovs(0.01, 0.9)).list() .layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/AutoEncoderNetworkTest.java b/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/AutoEncoderNetworkTest.java index 904dee8549ef..b2e8bc66102c 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/AutoEncoderNetworkTest.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/AutoEncoderNetworkTest.java @@ -20,6 +20,8 @@ import org.junit.After; import org.junit.Assert; import org.junit.Test; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.io.File; @@ -80,7 +82,7 @@ public void closeIt() { } private MultiLayerConfiguration getNNConfiguration() { - return new NeuralNetConfiguration.Builder().seed(12345).iterations(5).learningRate(.1) + return new NeuralNetConfiguration.Builder().seed(12345).iterations(5).updater(new Sgd(0.1)) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() .layer(0, new RBM.Builder().nIn(10).nOut(8) .lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build()) @@ -92,7 +94,7 @@ private MultiLayerConfiguration getNNConfiguration() { .lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build()) .layer(4, new RBM.Builder().nIn(5).nOut(8) .lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build()) //decoding starts - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation("sigmoid").nIn(8) + .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nIn(8) .nOut(10).build()) .pretrain(true).backprop(true).build(); } diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/SparkDl4jNetworkTest.java b/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/SparkDl4jNetworkTest.java index 033d819ca33a..b71e814d54b8 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/SparkDl4jNetworkTest.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-ml/src/test/java/org/deeplearning4j/spark/ml/impl/SparkDl4jNetworkTest.java @@ -11,7 +11,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -24,6 +23,8 @@ import org.junit.After; import org.junit.Assert; import org.junit.Test; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.io.File; @@ -95,12 +96,12 @@ public void closeIt() { private MultiLayerConfiguration getNNConfiguration() { return new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(10) - .weightInit(WeightInit.UNIFORM).learningRate(0.1).updater(Updater.NESTEROVS).list() + .weightInit(WeightInit.UNIFORM).updater(new Nesterovs(0.1)).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(100).weightInit(WeightInit.XAVIER) - .activation("relu").build()) + .activation(Activation.RELU).build()) .layer(1, new DenseLayer.Builder().nIn(100).nOut(120).weightInit(WeightInit.XAVIER) - .activation("relu").build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation("softmax").nIn(120) + .activation(Activation.RELU).build()) + .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SOFTMAX).nIn(120) .nOut(2).build()) .pretrain(false).backprop(true).build(); } diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectors.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectors.java index 847bd2508261..5f59142c1a8b 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectors.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectors.java @@ -7,8 +7,6 @@ import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.broadcast.Broadcast; import org.apache.spark.storage.StorageLevel; -import org.nd4j.linalg.primitives.Counter; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.deeplearning4j.models.embeddings.loader.VectorsConfiguration; import org.deeplearning4j.models.sequencevectors.SequenceVectors; @@ -24,6 +22,8 @@ import org.deeplearning4j.spark.models.sequencevectors.learning.SparkElementsLearningAlgorithm; import org.deeplearning4j.spark.models.sequencevectors.learning.SparkSequenceLearningAlgorithm; import org.deeplearning4j.spark.models.sequencevectors.primitives.ExtraCounter; +import org.nd4j.linalg.primitives.Counter; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.parameterserver.distributed.VoidParameterServer; import org.nd4j.parameterserver.distributed.conf.VoidConfiguration; import org.nd4j.parameterserver.distributed.enums.FaultToleranceStrategy; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/export/impl/VocabCacheExporter.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/export/impl/VocabCacheExporter.java index 92723139622c..aa70f44eefb2 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/export/impl/VocabCacheExporter.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/export/impl/VocabCacheExporter.java @@ -3,7 +3,6 @@ import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.apache.spark.api.java.JavaRDD; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; import org.deeplearning4j.models.word2vec.VocabWord; @@ -14,6 +13,7 @@ import org.deeplearning4j.spark.models.sequencevectors.export.SparkModelExporter; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.List; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/CountFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/CountFunction.java index 2fbb2077b6d6..fc8dbbe8bcea 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/CountFunction.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/CountFunction.java @@ -5,12 +5,12 @@ import org.apache.spark.Accumulator; import org.apache.spark.api.java.function.Function; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.Counter; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.embeddings.loader.VectorsConfiguration; import org.deeplearning4j.models.sequencevectors.sequence.Sequence; import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement; import org.deeplearning4j.spark.models.sequencevectors.learning.SparkElementsLearningAlgorithm; +import org.nd4j.linalg.primitives.Counter; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.parameterserver.distributed.VoidParameterServer; import org.nd4j.parameterserver.distributed.conf.VoidConfiguration; import org.nd4j.parameterserver.distributed.messages.TrainingMessage; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/ExtraCountFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/ExtraCountFunction.java index 691119683977..1de7521200cc 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/ExtraCountFunction.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/main/java/org/deeplearning4j/spark/models/sequencevectors/functions/ExtraCountFunction.java @@ -3,10 +3,10 @@ import lombok.NonNull; import org.apache.spark.Accumulator; import org.apache.spark.api.java.function.Function; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.sequencevectors.sequence.Sequence; import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement; import org.deeplearning4j.spark.models.sequencevectors.primitives.ExtraCounter; +import org.nd4j.linalg.primitives.Pair; /** * This accumulator function does count individual elements, using provided Accumulator diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectorsTest.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectorsTest.java index 136a59d3d71b..37d2f0344068 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectorsTest.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/java/org/deeplearning4j/spark/models/sequencevectors/SparkSequenceVectorsTest.java @@ -3,7 +3,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.nd4j.linalg.primitives.Counter; import org.deeplearning4j.models.sequencevectors.sequence.Sequence; import org.deeplearning4j.models.sequencevectors.sequence.ShallowSequenceElement; import org.deeplearning4j.models.word2vec.VocabWord; @@ -11,6 +10,7 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.nd4j.linalg.primitives.Counter; import java.util.ArrayList; import java.util.List; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java index 6b25d5eabc1e..b70219e589d6 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java @@ -26,9 +26,6 @@ import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.CounterMap; -import org.nd4j.linalg.primitives.Pair; -import org.nd4j.linalg.primitives.Triple; import org.deeplearning4j.models.glove.GloveWeightLookupTable; import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.models.word2vec.wordstore.VocabCache; @@ -39,6 +36,9 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.legacy.AdaGrad; +import org.nd4j.linalg.primitives.CounterMap; +import org.nd4j.linalg.primitives.Pair; +import org.nd4j.linalg.primitives.Triple; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import scala.Tuple2; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/GloveParam.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/GloveParam.java index a1e7c92b53c6..a2585282defa 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/GloveParam.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/GloveParam.java @@ -19,8 +19,8 @@ package org.deeplearning4j.spark.models.embeddings.glove; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.CounterMap; import org.nd4j.linalg.api.rng.Random; +import org.nd4j.linalg.primitives.CounterMap; import java.io.Serializable; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/cooccurrences/CoOccurrenceCalculator.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/cooccurrences/CoOccurrenceCalculator.java index ae76517cc7bd..fa5650ca776d 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/cooccurrences/CoOccurrenceCalculator.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/cooccurrences/CoOccurrenceCalculator.java @@ -20,11 +20,11 @@ import org.apache.spark.api.java.function.Function; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.CounterMap; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.models.word2vec.wordstore.VocabCache; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.CounterMap; +import org.nd4j.linalg.primitives.Pair; import java.util.List; import java.util.concurrent.atomic.AtomicLong; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/MapToPairFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/MapToPairFunction.java index 52ee21574336..126677fdeafb 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/MapToPairFunction.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/MapToPairFunction.java @@ -1,9 +1,9 @@ package org.deeplearning4j.spark.models.embeddings.word2vec; import org.apache.spark.api.java.function.Function; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.word2vec.VocabWord; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; import java.util.Map; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2Vec.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2Vec.java index 3bb5e58c58be..2387c81bb085 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2Vec.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2Vec.java @@ -26,7 +26,6 @@ import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; import org.deeplearning4j.models.embeddings.loader.VectorsConfiguration; import org.deeplearning4j.models.embeddings.wordvectors.WordVectorsImpl; @@ -41,6 +40,7 @@ import org.nd4j.linalg.heartbeat.reports.Environment; import org.nd4j.linalg.heartbeat.reports.Event; import org.nd4j.linalg.heartbeat.utils.EnvironmentUtils; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformer.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformer.java index ec32ac666e74..737395fa709a 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformer.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformer.java @@ -22,12 +22,12 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.VoidFunction; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; import org.deeplearning4j.models.word2vec.VocabWord; import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformerVoid.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformerVoid.java index 65e564e3d63a..c0c57469ba52 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformerVoid.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/word2vec/Word2VecPerformerVoid.java @@ -22,12 +22,12 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.VoidFunction; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; import org.deeplearning4j.models.word2vec.VocabWord; import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/CountCumSum.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/CountCumSum.java index 7dd9bcad8a75..b446bdce330b 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/CountCumSum.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/CountCumSum.java @@ -4,8 +4,8 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.Counter; import org.deeplearning4j.spark.text.accumulators.MaxPerPartitionAccumulator; +import org.nd4j.linalg.primitives.Counter; import java.util.concurrent.atomic.AtomicLong; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/TextPipeline.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/TextPipeline.java index e0e28678edc1..68655ab343f0 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/TextPipeline.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/TextPipeline.java @@ -22,15 +22,15 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.AtomicDouble; -import org.nd4j.linalg.primitives.Counter; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.embeddings.loader.VectorsConfiguration; import org.deeplearning4j.models.word2vec.Huffman; import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.models.word2vec.wordstore.VocabCache; import org.deeplearning4j.models.word2vec.wordstore.inmemory.AbstractCache; import org.deeplearning4j.spark.text.accumulators.WordFreqAccumulator; +import org.nd4j.linalg.primitives.AtomicDouble; +import org.nd4j.linalg.primitives.Counter; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.List; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/WordsListToVocabWordsFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/WordsListToVocabWordsFunction.java index 82db989ec556..8da5bedb9459 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/WordsListToVocabWordsFunction.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/text/functions/WordsListToVocabWordsFunction.java @@ -2,9 +2,9 @@ import org.apache.spark.api.java.function.Function; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.models.word2vec.wordstore.VocabCache; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.List; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/models/embeddings/glove/GloveTest.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/models/embeddings/glove/GloveTest.java index 5ba5e5a550a1..18c9c870be2e 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/models/embeddings/glove/GloveTest.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/models/embeddings/glove/GloveTest.java @@ -21,7 +21,6 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.Function; import org.datavec.api.util.ClassPathResource; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; @@ -31,6 +30,7 @@ import org.deeplearning4j.spark.text.BaseSparkTest; import org.junit.Ignore; import org.junit.Test; +import org.nd4j.linalg.primitives.Pair; import java.util.Collection; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/text/TextPipelineTest.java b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/text/TextPipelineTest.java index ebcebb50e8be..7093d667be1c 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/text/TextPipelineTest.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/java/org/deeplearning4j/spark/text/TextPipelineTest.java @@ -23,8 +23,6 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.Counter; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.models.word2vec.Huffman; import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.models.word2vec.wordstore.VocabCache; @@ -38,6 +36,8 @@ import org.junit.Before; import org.junit.Test; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Counter; +import org.nd4j.linalg.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import scala.Tuple2; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingWorker.java b/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingWorker.java index 21a46fa01678..3049f6313383 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingWorker.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingWorker.java @@ -2,7 +2,6 @@ import lombok.Getter; import org.apache.spark.broadcast.Broadcast; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -16,6 +15,7 @@ import org.deeplearning4j.spark.parameterserver.conf.SharedTrainingConfiguration; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; +import org.nd4j.linalg.primitives.Pair; /** * @author raver119@gmail.com diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/TrainingWorker.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/TrainingWorker.java index 50a0385970f6..a026471dd541 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/TrainingWorker.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/TrainingWorker.java @@ -1,11 +1,11 @@ package org.deeplearning4j.spark.api; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.spark.api.stats.SparkTrainingStats; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; +import org.nd4j.linalg.primitives.Pair; import java.io.Serializable; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerFlatMap.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerFlatMap.java index c10c7459c763..47d26d01f897 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerFlatMap.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerFlatMap.java @@ -2,7 +2,6 @@ import org.datavec.spark.functions.FlatMapFunctionAdapter; import org.datavec.spark.transform.BaseFlatMapFunctionAdaptee; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.iterator.AsyncDataSetIterator; import org.deeplearning4j.datasets.iterator.IteratorDataSetIterator; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -15,6 +14,7 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.Collections; import java.util.Iterator; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerMultiDataSetFlatMap.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerMultiDataSetFlatMap.java index c71b38d29023..8ec7c6fd6bee 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerMultiDataSetFlatMap.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/api/worker/ExecuteWorkerMultiDataSetFlatMap.java @@ -2,7 +2,6 @@ import org.datavec.spark.functions.FlatMapFunctionAdapter; import org.datavec.spark.transform.BaseFlatMapFunctionAdaptee; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.datasets.iterator.AsyncMultiDataSetIterator; import org.deeplearning4j.datasets.iterator.IteratorMultiDataSetIterator; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -14,6 +13,7 @@ import org.nd4j.linalg.dataset.api.MultiDataSet; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.Collections; import java.util.Iterator; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportDataSetsFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportDataSetsFunction.java index 4d7f10c17f3a..cc8ec4c88a89 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportDataSetsFunction.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportDataSetsFunction.java @@ -21,9 +21,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.function.Function2; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.util.UIDProvider; import org.nd4j.linalg.dataset.DataSet; +import org.nd4j.linalg.primitives.Pair; import java.net.URI; import java.util.*; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportMultiDataSetsFunction.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportMultiDataSetsFunction.java index 7c2f6b7782b1..4631a10c7816 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportMultiDataSetsFunction.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/data/BatchAndExportMultiDataSetsFunction.java @@ -21,9 +21,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.function.Function2; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.util.UIDProvider; import org.nd4j.linalg.dataset.api.MultiDataSet; +import org.nd4j.linalg.primitives.Pair; import java.net.URI; import java.util.*; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java index 17833cc789ea..61ebc252adf1 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java @@ -6,7 +6,6 @@ import org.deeplearning4j.api.storage.StatsStorageRouterProvider; import org.deeplearning4j.api.storage.StorageMetaData; import org.deeplearning4j.api.storage.listener.RoutingIterationListener; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -26,6 +25,7 @@ import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import java.util.ArrayList; import java.util.Collection; diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java index f5831620a410..d5cfc89ccdee 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java @@ -23,7 +23,6 @@ import org.apache.spark.api.java.JavaSparkContext; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.spark.impl.multilayer.SparkDl4jMultiLayer; import org.deeplearning4j.spark.impl.paramavg.ParameterAveragingTrainingMaster; import org.junit.After; @@ -32,6 +31,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.io.Serializable; @@ -121,8 +121,8 @@ protected int numExecutors() { } protected MultiLayerConfiguration getBasicConf() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123).updater(Updater.NESTEROVS) - .learningRate(0.1).momentum(0.9).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123) + .updater(new Nesterovs(0.1, 0.9)).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java index bc3ed892e964..097cf2b44018 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java @@ -16,7 +16,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; @@ -28,6 +27,7 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,7 +45,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { public void testEarlyStoppingIris() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); @@ -97,7 +97,7 @@ public void testBadTuning() { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(10.0) //Intentionally huge LR + .updater(new Sgd(10.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.MSE).build()) @@ -134,7 +134,7 @@ public void testTimeTermination() { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(1e-6).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); @@ -177,7 +177,7 @@ public void testNoImprovementNEpochsTermination() { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(0.0).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); @@ -210,7 +210,7 @@ public void testNoImprovementNEpochsTermination() { public void testListeners() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .pretrain(false).backprop(true).build(); diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java index 44c67dc1e9a1..bcbb8b594914 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java @@ -34,7 +34,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.weights.WeightInit; @@ -48,6 +47,7 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,7 +66,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { public void testEarlyStoppingIris() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") + .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").pretrain(false).backprop(true).build(); @@ -116,7 +116,7 @@ public void testBadTuning() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(2.0) //Intentionally huge LR + .updater(new Sgd(2.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.MSE).build(), "in") @@ -154,7 +154,7 @@ public void testTimeTermination() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(1e-6).weightInit(WeightInit.XAVIER).graphBuilder() + .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") @@ -199,7 +199,7 @@ public void testNoImprovementNEpochsTermination() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).learningRate(0.0).weightInit(WeightInit.XAVIER).graphBuilder() + .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") @@ -235,7 +235,7 @@ public void testNoImprovementNEpochsTermination() { public void testListeners() { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .updater(Updater.SGD).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") + .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").pretrain(false).backprop(true).build(); diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java index fd224beb6330..4ca1d244206b 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java @@ -2,7 +2,9 @@ import org.apache.spark.serializer.SerializerInstance; import org.deeplearning4j.eval.*; -import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.conf.graph.*; import org.deeplearning4j.nn.conf.graph.rnn.DuplicateToTimeSeriesVertex; @@ -13,6 +15,10 @@ import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.learning.config.Nadam; +import org.nd4j.linalg.schedule.MapSchedule; +import org.nd4j.linalg.schedule.ScheduleType; import scala.collection.JavaConversions; import java.nio.ByteBuffer; @@ -29,7 +35,7 @@ public class TestKryo extends BaseSparkKryoTest { private void testSerialization(T in, SerializerInstance si) { ByteBuffer bb = si.serialize(in, null); - T deserialized = si.deserialize(bb, null); + T deserialized = (T)si.deserialize(bb, null); assertEquals(in, deserialized); } @@ -41,19 +47,19 @@ public void testSerializationConfigurations() { //Check network configurations: Map m = new HashMap<>(); + m.put(0, 0.5); m.put(10, 0.1); - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().learningRate(0.2) - .learningRateDecayPolicy(LearningRatePolicy.Schedule).learningRateSchedule(m) - .updater(Updater.NADAM).list().layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) + MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder() + .updater(new Nadam(new MapSchedule(ScheduleType.ITERATION,m))).list().layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) .build(); testSerialization(mlc, si); ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.DISTRIBUTION) - .dist(new UniformDistribution(-1, 1)).learningRate(0.2) - .learningRateDecayPolicy(LearningRatePolicy.Schedule) - .learningRateSchedule(Collections.singletonMap(10, 0.1)).updater(Updater.ADAM).graphBuilder() + .dist(new UniformDistribution(-1, 1)) + .updater(new Adam(new MapSchedule(ScheduleType.ITERATION,m))) + .graphBuilder() .addInputs("in").addLayer("out", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in") .setOutputs("out").build(); diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java index bbb17ced8844..86fe6b65da47 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java @@ -29,6 +29,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.ArrayList; @@ -45,7 +46,7 @@ public void testSparkWithCustomLayer() { //Basic test - checks whether exceptions etc are thrown with custom layers + spark //Custom layers are tested more extensively in dl4j core MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().learningRate(0.1).list() + new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new CustomLayer(3.14159)).layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java index f9e92b0e3a4d..1270d00474dd 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java @@ -36,6 +36,8 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.io.ClassPathResource; +import org.nd4j.linalg.learning.config.Nesterovs; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import scala.Tuple2; @@ -76,7 +78,7 @@ public void testBasic() throws Exception { list.add(iter.next()); ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.1) + .updater(new Sgd(0.1)) .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) @@ -109,8 +111,8 @@ public void testBasic() throws Exception { @Test public void testDistributedScoring() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l1(0.1).l2(0.1) - .seed(123).updater(Updater.NESTEROVS).learningRate(0.1).momentum(0.9).graphBuilder() + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.1) + .seed(123).updater(new Nesterovs(0.1, 0.9)).graphBuilder() .addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build(), "in") diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/layer/TestSparkLayer.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/layer/TestSparkLayer.java index b47394840bcb..817d91e440f2 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/layer/TestSparkLayer.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/layer/TestSparkLayer.java @@ -30,6 +30,7 @@ import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.DataSet; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.List; @@ -44,7 +45,7 @@ public class TestSparkLayer extends BaseSparkTest { public void testIris2() throws Exception { NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(10) - .learningRate(1e-1) + .updater(new Sgd(0.1)) .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( LossFunctions.LossFunction.MCXENT).nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).build()) diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java index 93051aafecb0..4dc11ec778df 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java @@ -2,7 +2,6 @@ import org.apache.spark.api.java.JavaRDD; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.FrozenLayer; @@ -19,6 +18,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.ArrayList; @@ -36,10 +36,10 @@ public class TestFrozenLayers extends BaseSparkTest { @Test public void testSparkFrozenLayers() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1) - .updater(Updater.SGD).activation(Activation.TANH); + NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + .activation(Activation.TANH); - FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().learningRate(0.1).build(); + FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); int nIn = 6; int nOut = 3; @@ -114,13 +114,13 @@ public void testSparkFrozenLayers() { @Test public void testSparkFrozenLayersCompGraph() { - FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().learningRate(0.1).build(); + FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); int nIn = 6; int nOut = 3; - ComputationGraph origModel = new ComputationGraph(new NeuralNetConfiguration.Builder().learningRate(0.1) - .updater(Updater.SGD).activation(Activation.TANH).graphBuilder().addInputs("in") + ComputationGraph origModel = new ComputationGraph(new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + .activation(Activation.TANH).graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(6).nOut(5).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(4).build(), "0") .addLayer("2", new DenseLayer.Builder().nIn(4).nOut(3).build(), "1") diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java index 3231f4a7a1d3..fba7288f2c86 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java @@ -7,7 +7,6 @@ import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -25,6 +24,9 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.IUpdater; +import org.nd4j.linalg.learning.config.RmsProp; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.ArrayList; @@ -48,10 +50,10 @@ public void setUp() { } - private static MultiLayerConfiguration getConf(int seed, Updater updater) { + private static MultiLayerConfiguration getConf(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.5) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).iterations(1).seed(seed).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).build()) @@ -59,10 +61,10 @@ private static MultiLayerConfiguration getConf(int seed, Updater updater) { return conf; } - private static MultiLayerConfiguration getConfCNN(int seed, Updater updater) { + private static MultiLayerConfiguration getConfCNN(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.5) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).iterations(1).seed(seed).list() .layer(0, new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0) .activation(Activation.TANH).build()) @@ -74,10 +76,10 @@ private static MultiLayerConfiguration getConfCNN(int seed, Updater updater) { return conf; } - private static ComputationGraphConfiguration getGraphConf(int seed, Updater updater) { + private static ComputationGraphConfiguration getGraphConf(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.5) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).iterations(1).seed(seed).graphBuilder() .addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1", @@ -88,10 +90,10 @@ private static ComputationGraphConfiguration getGraphConf(int seed, Updater upda return conf; } - private static ComputationGraphConfiguration getGraphConfCNN(int seed, Updater updater) { + private static ComputationGraphConfiguration getGraphConfCNN(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.5) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).iterations(1).seed(seed).graphBuilder() .addInputs("in") .addLayer("0", new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1) @@ -171,7 +173,7 @@ public void testOneExecutor() { //Do training locally, for 3 minibatches int[] seeds = {1, 2, 3}; - MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, Updater.RMSPROP)); + MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, new RmsProp(0.5))); net.init(); INDArray initialParams = net.params().dup(); @@ -185,7 +187,7 @@ public void testOneExecutor() { //Do training on Spark with one executor, for 3 separate minibatches TrainingMaster tm = getTrainingMaster(1, miniBatchSize, saveUpdater); - SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, Updater.RMSPROP), tm); + SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, new RmsProp(0.5)), tm); sparkNet.setCollectTrainingStats(true); INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); @@ -221,7 +223,7 @@ public void testOneExecutorGraph() { //Do training locally, for 3 minibatches int[] seeds = {1, 2, 3}; - ComputationGraph net = new ComputationGraph(getGraphConf(12345, Updater.RMSPROP)); + ComputationGraph net = new ComputationGraph(getGraphConf(12345, new RmsProp(0.5))); net.init(); INDArray initialParams = net.params().dup(); @@ -236,7 +238,7 @@ public void testOneExecutorGraph() { //Do training on Spark with one executor, for 3 separate minibatches TrainingMaster tm = getTrainingMaster(1, miniBatchSize, saveUpdater); SparkComputationGraph sparkNet = - new SparkComputationGraph(sc, getGraphConf(12345, Updater.RMSPROP), tm); + new SparkComputationGraph(sc, getGraphConf(12345, new RmsProp(0.5)), tm); sparkNet.setCollectTrainingStats(true); INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); @@ -280,7 +282,7 @@ public void testAverageEveryStep() { // CudaGridExecutioner executioner = (CudaGridExecutioner) Nd4j.getExecutioner(); - MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, Updater.SGD)); + MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, new Sgd(0.5))); net.init(); INDArray initialParams = net.params().dup(); // executioner.addToWatchdog(initialParams, "initialParams"); @@ -301,7 +303,7 @@ public void testAverageEveryStep() { .saveUpdater(saveUpdater).workerPrefetchNumBatches(0) // .rddTrainingApproach(RDDTrainingApproach.Direct) .rddTrainingApproach(RDDTrainingApproach.Export).build(); - SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, Updater.SGD), tm); + SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, new Sgd(0.5)), tm); sparkNet.setCollectTrainingStats(true); INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); @@ -357,7 +359,7 @@ public void testAverageEveryStepCNN() { //Do training locally, for 3 minibatches int[] seeds = {1, 2, 3}; - MultiLayerNetwork net = new MultiLayerNetwork(getConfCNN(12345, Updater.SGD)); + MultiLayerNetwork net = new MultiLayerNetwork(getConfCNN(12345, new Sgd(0.5))); net.init(); INDArray initialParams = net.params().dup(); @@ -374,7 +376,7 @@ public void testAverageEveryStepCNN() { .averagingFrequency(1).batchSizePerWorker(miniBatchSizePerWorker) .saveUpdater(saveUpdater).workerPrefetchNumBatches(0) .rddTrainingApproach(RDDTrainingApproach.Export).build(); - SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConfCNN(12345, Updater.SGD), tm); + SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConfCNN(12345, new Sgd(0.5)), tm); sparkNet.setCollectTrainingStats(true); INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); @@ -430,7 +432,7 @@ public void testAverageEveryStepGraph() { // CudaGridExecutioner executioner = (CudaGridExecutioner) Nd4j.getExecutioner(); - ComputationGraph net = new ComputationGraph(getGraphConf(12345, Updater.SGD)); + ComputationGraph net = new ComputationGraph(getGraphConf(12345, new Sgd(0.5))); net.init(); INDArray initialParams = net.params().dup(); // executioner.addToWatchdog(initialParams, "initialParams"); @@ -446,7 +448,7 @@ public void testAverageEveryStepGraph() { //Do training on Spark with one executor, for 3 separate minibatches TrainingMaster tm = getTrainingMaster(1, miniBatchSizePerWorker, saveUpdater); - SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConf(12345, Updater.SGD), tm); + SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConf(12345, new Sgd(0.5)), tm); sparkNet.setCollectTrainingStats(true); INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); @@ -506,7 +508,7 @@ public void testAverageEveryStepGraphCNN() { //Do training locally, for 3 minibatches int[] seeds = {1, 2, 3}; - ComputationGraph net = new ComputationGraph(getGraphConfCNN(12345, Updater.SGD)); + ComputationGraph net = new ComputationGraph(getGraphConfCNN(12345, new Sgd(0.5))); net.init(); INDArray initialParams = net.params().dup(); @@ -520,7 +522,7 @@ public void testAverageEveryStepGraphCNN() { //Do training on Spark with one executor, for 3 separate minibatches TrainingMaster tm = getTrainingMaster(1, miniBatchSizePerWorker, saveUpdater); - SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConfCNN(12345, Updater.SGD), tm); + SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConfCNN(12345, new Sgd(0.5)), tm); sparkNet.setCollectTrainingStats(true); INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); diff --git a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java index f570618a11e4..e34761287d59 100644 --- a/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java +++ b/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java @@ -39,14 +39,12 @@ import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.RBM; import org.deeplearning4j.nn.conf.layers.variational.GaussianReconstructionDistribution; import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder; -import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.listeners.ScoreIterationListener; @@ -65,6 +63,9 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.io.ClassPathResource; +import org.nd4j.linalg.learning.config.IUpdater; +import org.nd4j.linalg.learning.config.Nesterovs; +import org.nd4j.linalg.learning.config.RmsProp; import org.nd4j.linalg.lossfunctions.LossFunctions; import scala.Tuple2; @@ -196,14 +197,14 @@ public void testUpdaters() { MultiLayerNetwork netCopy = sparkNet.getNetwork().clone(); netCopy.fit(data); - Updater expectedUpdater = ((BaseLayer) netCopy.conf().getLayer()).getUpdater(); - double expectedLR = ((BaseLayer) netCopy.conf().getLayer()).getLearningRate(); - double expectedMomentum = ((BaseLayer) netCopy.conf().getLayer()).getMomentum(); + IUpdater expectedUpdater = ((BaseLayer) netCopy.conf().getLayer()).getIUpdater(); + double expectedLR = ((Nesterovs)((BaseLayer) netCopy.conf().getLayer()).getIUpdater()).getLearningRate(); + double expectedMomentum = ((Nesterovs)((BaseLayer) netCopy.conf().getLayer()).getIUpdater()).getMomentum(); - Updater actualUpdater = ((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getUpdater(); + IUpdater actualUpdater = ((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater(); sparkNet.fit(sparkData); - double actualLR = ((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getLearningRate(); - double actualMomentum = ((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getMomentum(); + double actualLR = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater()).getLearningRate(); + double actualMomentum = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater()).getMomentum(); assertEquals(expectedUpdater, actualUpdater); assertEquals(expectedLR, actualLR, 0.01); @@ -248,7 +249,7 @@ public void testSmallAmountOfData() { //Idea: Test spark training where some executors don't get any data //in this case: by having fewer examples (2 DataSets) than executors (local[*]) - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) @@ -273,8 +274,8 @@ public void testSmallAmountOfData() { @Test public void testDistributedScoring() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l1(0.1).l2(0.1) - .seed(123).updater(Updater.NESTEROVS).learningRate(0.1).momentum(0.9).list() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.1) + .seed(123).updater(new Nesterovs(0.1, 0.9)).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( @@ -358,7 +359,7 @@ public void testParameterAveragingMultipleExamplesPerDataSet() throws Exception list.add(iter.next()); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -419,7 +420,7 @@ public void testFitViaStringPaths() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -485,7 +486,7 @@ public void testFitViaStringPathsSize1() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -569,7 +570,7 @@ public void testFitViaStringPathsCompGraph() throws Exception { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) @@ -636,7 +637,7 @@ public void testFitViaStringPathsCompGraph() throws Exception { @Test public void testSeedRepeatability() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.RMSPROP) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .weightInit(WeightInit.XAVIER).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(4) @@ -715,7 +716,7 @@ public void testIterationCounts() throws Exception { list.add(iter.next()); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -761,7 +762,7 @@ public void testIterationCountsGraph() throws Exception { list.add(iter.next()); } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) @@ -801,11 +802,11 @@ public void testVaePretrainSimple() { int nIn = 8; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.RMSPROP) + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp()) .weightInit(WeightInit.XAVIER).list() .layer(0, new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12) .decoderLayerSizes(13).reconstructionDistribution( - new GaussianReconstructionDistribution("identity")) + new GaussianReconstructionDistribution(Activation.IDENTITY)) .build()) .pretrain(true).backprop(false).build(); @@ -836,11 +837,11 @@ public void testVaePretrainSimpleCG() { int nIn = 8; Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.RMSPROP) + ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp()) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12) .decoderLayerSizes(13).reconstructionDistribution( - new GaussianReconstructionDistribution("identity")) + new GaussianReconstructionDistribution(Activation.IDENTITY)) .build(), "in") .setOutputs("0").pretrain(true).backprop(false).build(); diff --git a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/flow/FlowListenerModule.java b/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/flow/FlowListenerModule.java deleted file mode 100644 index fd4a787d6eb2..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/flow/FlowListenerModule.java +++ /dev/null @@ -1,117 +0,0 @@ -package org.deeplearning4j.ui.module.flow; - -import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.api.storage.Persistable; -import org.deeplearning4j.api.storage.StatsStorage; -import org.deeplearning4j.api.storage.StatsStorageEvent; -import org.deeplearning4j.ui.api.FunctionType; -import org.deeplearning4j.ui.api.HttpMethod; -import org.deeplearning4j.ui.api.Route; -import org.deeplearning4j.ui.api.UIModule; -import org.deeplearning4j.ui.flow.data.FlowStaticPersistable; -import org.deeplearning4j.ui.flow.data.FlowUpdatePersistable; -import play.libs.Json; -import play.mvc.Result; - -import java.util.*; - -import static play.mvc.Results.ok; - -/** - * Module for FlowIterationListener - * - * @author Alex Black - */ -@Slf4j -public class FlowListenerModule implements UIModule { - - private static final String TYPE_ID = "FlowListener"; - - private Map knownSessionIDs = Collections.synchronizedMap(new LinkedHashMap<>()); - - @Override - public List getCallbackTypeIDs() { - return Collections.singletonList(TYPE_ID); - } - - @Override - public List getRoutes() { - Route r1 = new Route("/flow", HttpMethod.GET, FunctionType.Supplier, - () -> ok(org.deeplearning4j.ui.views.html.flow.Flow.apply())); - Route r2 = new Route("/flow/info/:id", HttpMethod.GET, FunctionType.Function, this::getStaticInfo); - Route r3 = new Route("/flow/state/:id", HttpMethod.GET, FunctionType.Function, this::getUpdate); - Route r4 = new Route("/flow/listSessions", HttpMethod.GET, FunctionType.Supplier, this::listSessions); - - return Arrays.asList(r1, r2, r3, r4); - } - - @Override - public void reportStorageEvents(Collection events) { - //We should only be getting relevant session IDs... - for (StatsStorageEvent sse : events) { - if (!knownSessionIDs.containsKey(sse.getSessionID())) { - knownSessionIDs.put(sse.getSessionID(), sse.getStatsStorage()); - } - } - } - - @Override - public void onAttach(StatsStorage statsStorage) { - for (String sessionID : statsStorage.listSessionIDs()) { - for (String typeID : statsStorage.listTypeIDsForSession(sessionID)) { - if (!TYPE_ID.equals(typeID)) - continue; - knownSessionIDs.put(sessionID, statsStorage); - } - } - } - - @Override - public void onDetach(StatsStorage statsStorage) { - for (String s : knownSessionIDs.keySet()) { - if (statsStorage == knownSessionIDs.get(s)) { - knownSessionIDs.remove(s); - } - } - } - - private Result listSessions() { - return ok(Json.toJson(knownSessionIDs.keySet())); - } - - private Result getStaticInfo(String sessionID) { - if (!knownSessionIDs.containsKey(sessionID)) - return ok("Unknown session ID"); - StatsStorage ss = knownSessionIDs.get(sessionID); - - List list = ss.getAllStaticInfos(sessionID, TYPE_ID); - if (list == null || list.size() == 0) - return ok(); - - Persistable p = list.get(0); - if (!(p instanceof FlowStaticPersistable)) - return ok(); - - FlowStaticPersistable f = (FlowStaticPersistable) p; - - return ok(Json.toJson(f.getModelInfo())); - } - - private Result getUpdate(String sessionID) { - if (!knownSessionIDs.containsKey(sessionID)) - return ok("Unknown session ID"); - StatsStorage ss = knownSessionIDs.get(sessionID); - - List list = ss.getLatestUpdateAllWorkers(sessionID, TYPE_ID); - if (list == null || list.size() == 0) - return ok(); - - Persistable p = list.get(0); - if (!(p instanceof FlowUpdatePersistable)) - return ok(); - - FlowUpdatePersistable f = (FlowUpdatePersistable) p; - - return ok(Json.toJson(f.getModelState())); - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/histogram/HistogramModule.java b/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/histogram/HistogramModule.java deleted file mode 100644 index 009524616158..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/histogram/HistogramModule.java +++ /dev/null @@ -1,207 +0,0 @@ -package org.deeplearning4j.ui.module.histogram; - -import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.api.storage.Persistable; -import org.deeplearning4j.api.storage.StatsStorage; -import org.deeplearning4j.api.storage.StatsStorageEvent; -import org.deeplearning4j.ui.api.FunctionType; -import org.deeplearning4j.ui.api.HttpMethod; -import org.deeplearning4j.ui.api.Route; -import org.deeplearning4j.ui.api.UIModule; -import org.deeplearning4j.ui.stats.StatsListener; -import org.deeplearning4j.ui.stats.api.StatsInitializationReport; -import org.deeplearning4j.ui.stats.api.StatsReport; -import org.deeplearning4j.ui.stats.api.StatsType; -import org.deeplearning4j.ui.stats.api.SummaryType; -import org.deeplearning4j.ui.weights.beans.CompactModelAndGradient; -import play.libs.Json; -import play.mvc.Result; -import play.mvc.Results; - -import java.util.*; - -import static play.mvc.Results.ok; - -/** - * Module for the HistogramIterationListener - * - * @author Alex Black - */ -@Slf4j -public class HistogramModule implements UIModule { - - private Map knownSessionIDs = Collections.synchronizedMap(new LinkedHashMap<>()); - - - @Override - public List getCallbackTypeIDs() { - return Collections.singletonList(StatsListener.TYPE_ID); - } - - @Override - public List getRoutes() { - Route r = new Route("/weights", HttpMethod.GET, FunctionType.Supplier, - () -> ok(org.deeplearning4j.ui.views.html.histogram.Histogram.apply())); - Route r2 = new Route("/weights/listSessions", HttpMethod.GET, FunctionType.Supplier, - () -> ok(Json.toJson(knownSessionIDs.keySet()))); - Route r3 = new Route("/weights/updated/:sid", HttpMethod.GET, FunctionType.Function, this::getLastUpdateTime); - Route r4 = new Route("/weights/data/:sid", HttpMethod.GET, FunctionType.Function, this::processRequest); - - return Arrays.asList(r, r2, r3, r4); - } - - @Override - public void reportStorageEvents(Collection events) { - log.trace("Received events: {}", events); - - //We should only be getting relevant session IDs... - for (StatsStorageEvent sse : events) { - if (!knownSessionIDs.containsKey(sse.getSessionID())) { - knownSessionIDs.put(sse.getSessionID(), sse.getStatsStorage()); - } - } - } - - @Override - public void onAttach(StatsStorage statsStorage) { - for (String sessionID : statsStorage.listSessionIDs()) { - for (String typeID : statsStorage.listTypeIDsForSession(sessionID)) { - if (!StatsListener.TYPE_ID.equals(typeID)) - continue; - knownSessionIDs.put(sessionID, statsStorage); - } - } - } - - @Override - public void onDetach(StatsStorage statsStorage) { - for (String sessionID : statsStorage.listSessionIDs()) { - knownSessionIDs.remove(sessionID); - } - } - - private Result getLastUpdateTime(String sessionID) { - return Results.ok(Json.toJson(System.currentTimeMillis())); - } - - private Result processRequest(String sessionId) { - //TODO cache the relevant info and update, rather than querying StatsStorage and building from scratch each time - - StatsStorage ss = knownSessionIDs.get(sessionId); - if (ss == null) { - return Results.notFound("Unknown session ID: " + sessionId); - } - - List workerIDs = ss.listWorkerIDsForSession(sessionId); - - //TODO checks - StatsInitializationReport initReport = (StatsInitializationReport) ss.getStaticInfo(sessionId, - StatsListener.TYPE_ID, workerIDs.get(0)); - if (initReport == null) - return Results.ok(Json.toJson(Collections.EMPTY_MAP)); - - String[] paramNames = initReport.getModelParamNames(); - //Infer layer names from param names... - Set layerNameSet = new LinkedHashSet<>(); - for (String s : paramNames) { - String[] split = s.split("_"); - if (!layerNameSet.contains(split[0])) { - layerNameSet.add(split[0]); - } - } - List layerNameList = new ArrayList<>(layerNameSet); - - List list = ss.getAllUpdatesAfter(sessionId, StatsListener.TYPE_ID, workerIDs.get(0), 0); - Collections.sort(list, (a, b) -> Long.compare(a.getTimeStamp(), b.getTimeStamp())); - - List scoreList = new ArrayList<>(list.size()); - List>> meanMagHistoryParams = new ArrayList<>(); //List.get(i) -> layer i. Maps: parameter for the given layer - List>> meanMagHistoryUpdates = new ArrayList<>(); //List.get(i) -> layer i. Maps: updates for the given layer - for (int i = 0; i < layerNameList.size(); i++) { - meanMagHistoryParams.add(new HashMap<>()); - meanMagHistoryUpdates.add(new HashMap<>()); - } - - StatsReport last = null; - for (Persistable p : list) { - if (!(p instanceof StatsReport)) { - log.debug("Encountered unexpected type: {}", p); - continue; - } - StatsReport sp = (StatsReport) p; - scoreList.add(sp.getScore()); - - //Mean magnitudes - if (sp.hasSummaryStats(StatsType.Parameters, SummaryType.MeanMagnitudes)) { - updateMeanMagnitudeMaps(sp.getMeanMagnitudes(StatsType.Parameters), layerNameList, - meanMagHistoryParams); - } - - if (sp.hasSummaryStats(StatsType.Updates, SummaryType.MeanMagnitudes)) { - updateMeanMagnitudeMaps(sp.getMeanMagnitudes(StatsType.Updates), layerNameList, meanMagHistoryUpdates); - } - - last = sp; - } - - Map newParams = getHistogram(last.getHistograms(StatsType.Parameters)); - Map newGrad = getHistogram(last.getHistograms(StatsType.Updates)); - - double lastScore = (scoreList.size() == 0 ? 0.0 : scoreList.get(scoreList.size() - 1)); - - CompactModelAndGradient g = new CompactModelAndGradient(); - g.setGradients(newGrad); - g.setParameters(newParams); - g.setScore(lastScore); - g.setScores(scoreList); - // g.setPath(subPath); - g.setUpdateMagnitudes(meanMagHistoryUpdates); - g.setParamMagnitudes(meanMagHistoryParams); - // g.setLayerNames(layerNames); - g.setLastUpdateTime(last.getTimeStamp()); - - return Results.ok(Json.toJson(g)); - } - - private void updateMeanMagnitudeMaps(Map current, List layerNames, - List>> history) { - for (Map.Entry entry : current.entrySet()) { - String key = entry.getKey(); - String[] split = key.split("_"); - int idx = layerNames.indexOf(split[0]); - Map> map = history.get(idx); - List l = map.get(key); - if (l == null) { - l = new ArrayList<>(); - map.put(key, l); - } - l.add(entry.getValue()); - } - } - - private Map getHistogram(Map histograms) { - Map ret = new LinkedHashMap<>(); - for (String s : histograms.keySet()) { - org.deeplearning4j.ui.stats.api.Histogram h = histograms.get(s); - String newName; - if (Character.isDigit(s.charAt(0))) - newName = "param_" + s; - else - newName = s; - - Map temp = new LinkedHashMap<>(); - double min = h.getMin(); - double max = h.getMax(); - int n = h.getNBins(); - double step = (max - min) / n; - int[] counts = h.getBinCounts(); - for (int i = 0; i < n; i++) { - double binLoc = min + i * step + step / 2.0; - temp.put(binLoc, counts[i]); - } - - ret.put(newName, temp); - } - return ret; - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java b/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java index fe4996f5b637..1cd6a10c30a4 100644 --- a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java +++ b/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java @@ -7,9 +7,6 @@ import org.deeplearning4j.api.storage.StatsStorage; import org.deeplearning4j.api.storage.StatsStorageEvent; import org.deeplearning4j.api.storage.StatsStorageListener; -import org.eclipse.collections.impl.list.mutable.primitive.LongArrayList; -import org.nd4j.linalg.primitives.Pair; -import org.nd4j.linalg.primitives.Triple; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -28,7 +25,10 @@ import org.deeplearning4j.ui.views.html.training.TrainingModel; import org.deeplearning4j.ui.views.html.training.TrainingOverview; import org.deeplearning4j.ui.views.html.training.TrainingSystem; +import org.eclipse.collections.impl.list.mutable.primitive.LongArrayList; import org.nd4j.linalg.learning.config.IUpdater; +import org.nd4j.linalg.primitives.Pair; +import org.nd4j.linalg.primitives.Triple; import play.libs.Json; import play.mvc.Result; import play.mvc.Results; diff --git a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/play/PlayUIServer.java b/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/play/PlayUIServer.java index 5416ade8b23c..0212bba6df75 100644 --- a/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/play/PlayUIServer.java +++ b/deeplearning4j-ui-parent/deeplearning4j-play/src/main/java/org/deeplearning4j/ui/play/PlayUIServer.java @@ -10,15 +10,12 @@ import org.deeplearning4j.api.storage.StatsStorageEvent; import org.deeplearning4j.api.storage.StatsStorageListener; import org.deeplearning4j.api.storage.StatsStorageRouter; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.ui.api.Route; import org.deeplearning4j.ui.api.UIModule; import org.deeplearning4j.ui.api.UIServer; import org.deeplearning4j.ui.i18n.I18NProvider; import org.deeplearning4j.ui.module.convolutional.ConvolutionalListenerModule; import org.deeplearning4j.ui.module.defaultModule.DefaultModule; -import org.deeplearning4j.ui.module.flow.FlowListenerModule; -import org.deeplearning4j.ui.module.histogram.HistogramModule; import org.deeplearning4j.ui.module.remote.RemoteReceiverModule; import org.deeplearning4j.ui.module.train.TrainModule; import org.deeplearning4j.ui.module.tsne.TsneModule; @@ -27,6 +24,7 @@ import org.deeplearning4j.ui.play.staticroutes.I18NRoute; import org.deeplearning4j.ui.storage.InMemoryStatsStorage; import org.deeplearning4j.ui.storage.impl.QueueStatsStorageListener; +import org.nd4j.linalg.primitives.Pair; import org.reflections.ReflectionUtils; import org.reflections.Reflections; import play.Mode; @@ -123,10 +121,8 @@ public void runMain(String[] args) { routingDsl.GET("/assets/*file").routeTo(FunctionUtil.function(new Assets(ASSETS_ROOT_DIRECTORY))); uiModules.add(new DefaultModule()); //For: navigation page "/" - uiModules.add(new HistogramModule()); uiModules.add(new TrainModule()); uiModules.add(new ConvolutionalListenerModule()); - uiModules.add(new FlowListenerModule()); uiModules.add(new TsneModule()); remoteReceiverModule = new RemoteReceiverModule(); uiModules.add(remoteReceiverModule); diff --git a/deeplearning4j-ui-parent/deeplearning4j-play/src/test/java/org/deeplearning4j/ui/play/TestPlayUI.java b/deeplearning4j-ui-parent/deeplearning4j-play/src/test/java/org/deeplearning4j/ui/play/TestPlayUI.java index 95db70189a82..5de4bfcead02 100644 --- a/deeplearning4j-ui-parent/deeplearning4j-play/src/test/java/org/deeplearning4j/ui/play/TestPlayUI.java +++ b/deeplearning4j-ui-parent/deeplearning4j-play/src/test/java/org/deeplearning4j/ui/play/TestPlayUI.java @@ -6,7 +6,6 @@ import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.RBM; @@ -23,6 +22,7 @@ import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import static org.junit.Assert.assertEquals; @@ -87,19 +87,19 @@ public void testUI_VAE() throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .learningRate(1e-5) + .updater(new Sgd(1e-5)) .list().layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(10, 11) .decoderLayerSizes(12, 13).weightInit(WeightInit.XAVIER) - .pzxActivationFunction("identity") + .pzxActivationFunction(Activation.IDENTITY) .reconstructionDistribution( new GaussianReconstructionDistribution()) - .activation(Activation.LEAKYRELU).updater(Updater.SGD).build()) + .activation(Activation.LEAKYRELU).build()) .layer(1, new VariationalAutoencoder.Builder().nIn(3).nOut(3).encoderLayerSizes(7) .decoderLayerSizes(8).weightInit(WeightInit.XAVIER) - .pzxActivationFunction("identity") + .pzxActivationFunction(Activation.IDENTITY) .reconstructionDistribution(new GaussianReconstructionDistribution()) - .activation(Activation.LEAKYRELU).updater(Updater.SGD).build()) + .activation(Activation.LEAKYRELU).build()) .layer(2, new OutputLayer.Builder().nIn(3).nOut(3).build()).pretrain(true).backprop(true) .build(); @@ -130,7 +130,7 @@ public void testUI_RBM() throws Exception { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .learningRate(1e-5).list().layer(0, new RBM.Builder().nIn(4).nOut(3).build()) + .updater(new Sgd(0.1)).list().layer(0, new RBM.Builder().nIn(4).nOut(3).build()) .layer(1, new RBM.Builder().nIn(3).nOut(3).build()) .layer(2, new OutputLayer.Builder().nIn(3).nOut(3).build()).pretrain(true).backprop(true) .build(); diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/Coords.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/Coords.java deleted file mode 100644 index ae90fc6b89cd..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/Coords.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.deeplearning4j.ui.flow.beans; - -import lombok.Data; - -import java.io.Serializable; - -/** - * @author raver119@gmail.com - */ -@Data -public class Coords implements Serializable { - private int x; - private int y; - - public Coords() { - - } - - public Coords(int x, int y) { - this.x = x; - this.y = y; - } - - public static Coords makeCoors(int x, int y) { - return new Coords(x, y); - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/Description.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/Description.java deleted file mode 100644 index 6623943c67da..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/Description.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.deeplearning4j.ui.flow.beans; - -import lombok.Data; - -import java.io.Serializable; - -/** - * Description bean holds few lines worth text description for any layer - * - * @author raver119@gmail.com - */ -@Data -public class Description implements Serializable { - private final static long serialVersionUID = 119L; - private String mainLine = ""; - private String subLine = ""; - private String text = ""; -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/LayerInfo.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/LayerInfo.java deleted file mode 100644 index bfd64bcacd8c..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/LayerInfo.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.deeplearning4j.ui.flow.beans; - -import lombok.Data; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; - -/** - * This bean describes abstract layer and it's connections - * - * @author raver119@gmail.com - */ -@Data -public class LayerInfo implements Serializable { - private final static long serialVersionUID = 119L; - private long id; - private String name; - private String layerType; - - private String color; - - // grid coordinates. row & column - private int x = 0; - private int y = 0; - - private Description description; - - // set of connections as grid coordinates - private List connections = new ArrayList<>(); - - public void addConnection(LayerInfo layerInfo) { - if (!connections.contains(Coords.makeCoors(layerInfo.getX(), layerInfo.getY()))) { - connections.add(Coords.makeCoors(layerInfo.getX(), layerInfo.getY())); - } - } - - public void addConnection(int x, int y) { - if (!connections.contains(Coords.makeCoors(x, y))) - connections.add(Coords.makeCoors(x, y)); - } - - public void dropConnection(int x, int y) { - connections.remove(Coords.makeCoors(x, y)); - } - - public void dropConnections() { - connections.clear(); - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/LayerParams.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/LayerParams.java deleted file mode 100644 index 7d9847dca4cf..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/LayerParams.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.deeplearning4j.ui.flow.beans; - -import lombok.Data; - -import java.io.Serializable; -import java.util.Map; - -/** - * - * @author raver119@gmail.com - */ -@Data -public class LayerParams implements Serializable { - private Map W; - private Map RW; - private Map RWF; - private Map B; -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/ModelInfo.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/ModelInfo.java deleted file mode 100644 index fd004d7a31f8..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/ModelInfo.java +++ /dev/null @@ -1,101 +0,0 @@ -package org.deeplearning4j.ui.flow.beans; - -import lombok.Data; -import lombok.NonNull; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Set; - -/** - * This bean works as holder for unbounded list of layers. Each layer has it's own place in model's virtual coordinate space. - * For now, coordinate space is limited to 2 dimensions - * - * @author raver119@gmail.com - */ -@Data -public class ModelInfo implements Serializable { - private final static long serialVersionUID = 119L; - private long time = System.currentTimeMillis(); - private transient int counter = 0; - - // PLEASE NOTE: Inverted coords here -> Y, X LayerInfo - //private Table layers = HashBasedTable.create(); - // private Map, LayerInfo> layers = new LinkedHashMap<>(); - private List layers = new ArrayList<>(); - - /** - * This method maps given layer into model coordinate space - * @param layer - */ - public synchronized void addLayer(@NonNull LayerInfo layer) { - if (!layers.contains(layer)) { - layer.setId(counter); - this.layers.add(layer); - counter++; - } - } - - /** - * This method returns LayerInfo for specified layer name - * @param name - * @return - */ - public LayerInfo getLayerInfoByName(String name) { - for (LayerInfo layerInfo : layers) { - if (layerInfo.getName().equalsIgnoreCase(name)) - return layerInfo; - } - return null; - } - - /** - * This method returns LayerInfo for specified grid coordinates - * @param x - * @param y - * @return - */ - public LayerInfo getLayerInfoByCoords(int x, int y) { - for (LayerInfo layerInfo : layers) { - if (layerInfo.getX() == x && layerInfo.getY() == y) - return layerInfo; - } - - return null; - } - - /** - * This method returns the total number of nodes within described model - * - * @return number of elements - */ - public int size() { - return layers.size(); - } - - /** - * This method returns all LayerTypes used in this model - * @return - */ - public Set getLayerTypes() { - Set set = new LinkedHashSet<>(); - - for (LayerInfo layerInfo : layers) { - set.add(layerInfo.getLayerType()); - } - return set; - } - - public Set getLayersByType(String layerType) { - Set set = new LinkedHashSet<>(); - - for (LayerInfo layerInfo : layers) { - if (layerInfo.getLayerType().equals(layerType)) - set.add(layerInfo); - } - - return set; - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/ModelState.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/ModelState.java deleted file mode 100644 index d25a41afa391..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/ModelState.java +++ /dev/null @@ -1,63 +0,0 @@ -package org.deeplearning4j.ui.flow.beans; - -import lombok.Data; - -import java.io.Serializable; -import java.util.*; - -/** - * @author raver119@gmail.com - */ -@Data -public class ModelState implements Serializable { - private float score = 0.0f; - private String trainingTime; - private List scores = new ArrayList<>(); - private float performanceBatches; - private float performanceSamples; - private long iterationTime; - - private Map parameters = new HashMap<>(); - private Map gradients = new HashMap<>(); - private List learningRates = new ArrayList<>(); - - private Map layerParams = new LinkedHashMap<>(); - - //private List performanceBatches = new ArrayList<>(); - //private List performanceSamples = new ArrayList<>(); - - public ModelState() { - - } - - public void addScore(float score) { - if (scores.size() > 1000) - scores.remove(0); - - scores.add(score); - } - - - public void addPerformanceBatches(float perf) { - performanceBatches = perf; - } - - public void addPerformanceSamples(float perf) { - performanceSamples = perf; - } - /* - public void addPerformanceBatches(float perf) { - if (performanceBatches.size() > 100) - performanceBatches.remove(0); - - performanceBatches.add(perf); - } - - public void addPerformanceSamples(float perf) { - if (performanceSamples.size() > 100) - performanceSamples.remove(0); - - performanceSamples.add(perf); - } - */ -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/NodeReport.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/NodeReport.java deleted file mode 100644 index 32e5509f53f6..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/beans/NodeReport.java +++ /dev/null @@ -1,20 +0,0 @@ -package org.deeplearning4j.ui.flow.beans; - -import lombok.Data; - -import java.io.Serializable; - -/** - * Unified node state report: weights/gradients as distribution - * - * @author raver119@gmail.com - */ -@Data -public class NodeReport implements Serializable { - private final static long serialVersionUID = 119L; - /* - TODO: to be implemented - - Basic idea: categorized distribution for weights/gradients built from INDArray, suitable for concurrent generation - */ -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/data/FlowStaticPersistable.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/data/FlowStaticPersistable.java deleted file mode 100644 index dac0a8e3cd85..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/data/FlowStaticPersistable.java +++ /dev/null @@ -1,99 +0,0 @@ -package org.deeplearning4j.ui.flow.data; - -import lombok.AllArgsConstructor; -import lombok.Data; -import lombok.NoArgsConstructor; -import org.apache.commons.compress.utils.IOUtils; -import org.deeplearning4j.api.storage.Persistable; -import org.deeplearning4j.ui.flow.beans.ModelInfo; - -import java.io.*; -import java.nio.ByteBuffer; - -/** - * Created by Alex on 25/10/2016. - */ -@AllArgsConstructor -@NoArgsConstructor -@Data -public class FlowStaticPersistable implements Persistable { - - private String sessionID; - private String workerID; - private long timestamp; - private ModelInfo modelInfo; - - @Override - public String getSessionID() { - return sessionID; - } - - @Override - public String getTypeID() { - return FlowUpdatePersistable.TYPE_ID; - } - - @Override - public String getWorkerID() { - return workerID; - } - - @Override - public long getTimeStamp() { - return timestamp; - } - - @Override - public int encodingLengthBytes() { - return 0; - } - - @Override - public byte[] encode() { - //Not the most efficient: but it's easy to implement... - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (ObjectOutputStream oos = new ObjectOutputStream(baos)) { - oos.writeObject(this); - } catch (IOException e) { - throw new RuntimeException(e); //Shouldn't normally happen - } - - return baos.toByteArray(); - } - - @Override - public void encode(ByteBuffer buffer) { - buffer.put(encode()); - } - - @Override - public void encode(OutputStream outputStream) throws IOException { - outputStream.write(encode()); - } - - @Override - public void decode(byte[] decode) { - try (ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(decode))) { - FlowStaticPersistable p = (FlowStaticPersistable) ois.readObject(); - this.sessionID = p.sessionID; - this.workerID = p.workerID; - this.timestamp = p.getTimeStamp(); - this.modelInfo = p.modelInfo; - } catch (IOException | ClassNotFoundException e) { - throw new RuntimeException(e); //Shouldn't normally happen - } - } - - @Override - public void decode(ByteBuffer buffer) { - byte[] arr = new byte[buffer.remaining()]; - buffer.get(arr); - decode(arr); - } - - @Override - public void decode(InputStream inputStream) throws IOException { - byte[] b = IOUtils.toByteArray(inputStream); - decode(b); - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/data/FlowUpdatePersistable.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/data/FlowUpdatePersistable.java deleted file mode 100644 index 5d43e13a91be..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/flow/data/FlowUpdatePersistable.java +++ /dev/null @@ -1,101 +0,0 @@ -package org.deeplearning4j.ui.flow.data; - -import lombok.AllArgsConstructor; -import lombok.Data; -import lombok.NoArgsConstructor; -import org.apache.commons.compress.utils.IOUtils; -import org.deeplearning4j.api.storage.Persistable; -import org.deeplearning4j.ui.flow.beans.ModelState; - -import java.io.*; -import java.nio.ByteBuffer; - -/** - * Created by Alex on 25/10/2016. - */ -@AllArgsConstructor -@NoArgsConstructor -@Data -public class FlowUpdatePersistable implements Persistable { - - public static final String TYPE_ID = "FlowListener"; - - private String sessionID; - private String workerID; - private long timestamp; - private ModelState modelState; - - @Override - public String getSessionID() { - return sessionID; - } - - @Override - public String getTypeID() { - return TYPE_ID; - } - - @Override - public String getWorkerID() { - return workerID; - } - - @Override - public long getTimeStamp() { - return timestamp; - } - - @Override - public int encodingLengthBytes() { - return 0; - } - - @Override - public byte[] encode() { - //Not the most efficient: but it's easy to implement... - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (ObjectOutputStream oos = new ObjectOutputStream(baos)) { - oos.writeObject(this); - } catch (IOException e) { - throw new RuntimeException(e); //Shouldn't normally happen - } - - return baos.toByteArray(); - } - - @Override - public void encode(ByteBuffer buffer) { - buffer.put(encode()); - } - - @Override - public void encode(OutputStream outputStream) throws IOException { - outputStream.write(encode()); - } - - @Override - public void decode(byte[] decode) { - try (ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(decode))) { - FlowUpdatePersistable p = (FlowUpdatePersistable) ois.readObject(); - this.sessionID = p.sessionID; - this.workerID = p.workerID; - this.timestamp = p.getTimeStamp(); - this.modelState = p.modelState; - } catch (IOException | ClassNotFoundException e) { - throw new RuntimeException(e); //Shouldn't normally happen - } - } - - @Override - public void decode(ByteBuffer buffer) { - byte[] arr = new byte[buffer.remaining()]; - buffer.get(arr); - decode(arr); - } - - @Override - public void decode(InputStream inputStream) throws IOException { - byte[] b = IOUtils.toByteArray(inputStream); - decode(b); - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/BaseStatsListener.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/BaseStatsListener.java index f84c69c2e06a..6a2617cff136 100644 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/BaseStatsListener.java +++ b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/BaseStatsListener.java @@ -6,7 +6,6 @@ import org.deeplearning4j.api.storage.StatsStorageRouter; import org.deeplearning4j.api.storage.StorageMetaData; import org.deeplearning4j.api.storage.listener.RoutingIterationListener; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -21,6 +20,7 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.primitives.Pair; import org.nd4j.nativeblas.NativeOps; import org.nd4j.nativeblas.NativeOpsHolder; @@ -412,7 +412,7 @@ public void iterationDone(Model model, int iteration) { int layerIdx = 0; for (Layer l : ((MultiLayerNetwork) model).getLayers()) { NeuralNetConfiguration conf = l.conf(); - Map layerLrs = conf.getLearningRateByParam(); + Map layerLrs = null; //TODO conf.getLearningRateByParam(); Set backpropParams = l.paramTable(true).keySet(); for (Map.Entry entry : layerLrs.entrySet()) { if (!backpropParams.contains(entry.getKey())) @@ -425,7 +425,7 @@ public void iterationDone(Model model, int iteration) { for (Layer l : ((ComputationGraph) model).getLayers()) { //Need to append layer name NeuralNetConfiguration conf = l.conf(); - Map layerLrs = conf.getLearningRateByParam(); + Map layerLrs = null; //TODO conf.getLearningRateByParam(); String layerName = conf.getLayer().getLayerName(); Set backpropParams = l.paramTable(true).keySet(); for (Map.Entry entry : layerLrs.entrySet()) { @@ -436,7 +436,7 @@ public void iterationDone(Model model, int iteration) { } } else if (model instanceof Layer) { Layer l = (Layer) model; - Map map = l.conf().getLearningRateByParam(); + Map map = null; //TODO l.conf().getLearningRateByParam(); lrs.putAll(map); } report.reportLearningRates(lrs); diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/api/StatsReport.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/api/StatsReport.java index f34ffe6133bd..62d1c5c3b421 100644 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/api/StatsReport.java +++ b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/api/StatsReport.java @@ -1,8 +1,8 @@ package org.deeplearning4j.ui.stats.api; import org.deeplearning4j.api.storage.Persistable; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.ui.stats.StatsListener; +import org.nd4j.linalg.primitives.Pair; import java.io.Serializable; import java.util.List; diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/impl/SbeStatsReport.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/impl/SbeStatsReport.java index c7817a15021c..2fca7e73084f 100644 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/impl/SbeStatsReport.java +++ b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/impl/SbeStatsReport.java @@ -8,13 +8,13 @@ import org.agrona.MutableDirectBuffer; import org.agrona.concurrent.UnsafeBuffer; import org.apache.commons.io.IOUtils; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.ui.stats.api.Histogram; import org.deeplearning4j.ui.stats.api.StatsReport; import org.deeplearning4j.ui.stats.api.StatsType; import org.deeplearning4j.ui.stats.api.SummaryType; import org.deeplearning4j.ui.stats.sbe.*; import org.deeplearning4j.ui.storage.AgronaPersistable; +import org.nd4j.linalg.primitives.Pair; import java.io.*; import java.nio.ByteBuffer; diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/impl/java/JavaStatsReport.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/impl/java/JavaStatsReport.java index 15d17a11875b..4572777cb8f1 100644 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/impl/java/JavaStatsReport.java +++ b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/impl/java/JavaStatsReport.java @@ -5,11 +5,11 @@ import lombok.EqualsAndHashCode; import lombok.ToString; import org.apache.commons.compress.utils.IOUtils; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.ui.stats.api.Histogram; import org.deeplearning4j.ui.stats.api.StatsReport; import org.deeplearning4j.ui.stats.api.StatsType; import org.deeplearning4j.ui.stats.api.SummaryType; +import org.nd4j.linalg.primitives.Pair; import java.io.*; import java.lang.reflect.Field; diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsClasses.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsClasses.java index d3a16e918c8c..34009a38dc11 100644 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsClasses.java +++ b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsClasses.java @@ -1,12 +1,12 @@ package org.deeplearning4j.ui.stats; -import org.nd4j.linalg.primitives.Pair; import org.deeplearning4j.ui.stats.api.*; import org.deeplearning4j.ui.stats.impl.SbeStatsInitializationReport; import org.deeplearning4j.ui.stats.impl.SbeStatsReport; import org.deeplearning4j.ui.stats.impl.java.JavaStatsInitializationReport; import org.junit.Assert; import org.junit.Test; +import org.nd4j.linalg.primitives.Pair; import java.io.*; import java.util.ArrayList; diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java index 9bcf911eb56b..f56337e80e7d 100644 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java +++ b/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java @@ -11,6 +11,7 @@ import org.junit.Test; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Sgd; import java.io.File; import java.io.IOException; @@ -35,7 +36,7 @@ public void test() throws IOException { MultiLayerNetwork net2 = new TransferLearning.Builder(net) .fineTuneConfiguration( - new FineTuneConfiguration.Builder().learningRate(0.01).build()) + new FineTuneConfiguration.Builder().updater(new Sgd(0.01)).build()) .setFeatureExtractor(0).build(); File f = Files.createTempFile("dl4jTestTransferStatsCollection", "bin").toFile(); diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-remote-iterationlisteners/src/main/java/org/deeplearning4j/ui/flow/RemoteFlowIterationListener.java b/deeplearning4j-ui-parent/deeplearning4j-ui-remote-iterationlisteners/src/main/java/org/deeplearning4j/ui/flow/RemoteFlowIterationListener.java deleted file mode 100644 index 5b9b52834279..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-remote-iterationlisteners/src/main/java/org/deeplearning4j/ui/flow/RemoteFlowIterationListener.java +++ /dev/null @@ -1,542 +0,0 @@ -package org.deeplearning4j.ui.flow; - -import lombok.NonNull; -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; -import org.deeplearning4j.nn.conf.layers.BaseLayer; -import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; -import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; -import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; -import org.deeplearning4j.nn.graph.ComputationGraph; -import org.deeplearning4j.nn.graph.vertex.GraphVertex; -import org.deeplearning4j.nn.graph.vertex.VertexIndices; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.ui.UiConnectionInfo; -import org.deeplearning4j.ui.flow.beans.*; -import org.deeplearning4j.ui.weights.HistogramBin; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.shape.Shape; -import org.nd4j.linalg.util.ArrayUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.ws.rs.client.Client; -import javax.ws.rs.client.ClientBuilder; -import javax.ws.rs.client.Entity; -import javax.ws.rs.client.WebTarget; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; -import java.util.*; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; - -/** - * This IterationListener is suited for general model performance/architecture overview - * - * PLEASE NOTE: WORK IN PROGRESS, DO NOT USE IT UNLESS YOU HAVE TO - * - * @author raver119@gmail.com - */ -public class RemoteFlowIterationListener implements IterationListener { - private static final String FORMAT = "%02d:%02d:%02d"; - public static final String LOCALHOST = "localhost"; - public static final String INPUT = "INPUT"; - // TODO: basic auth should be considered here as well - private String remoteAddr; - private int remotePort; - private String login; - private String password; - private int frequency = 1; - private boolean firstIteration = true; - private String path; - private UiConnectionInfo connectionInfo; - private ModelState modelState = new ModelState(); - - private AtomicLong iterationCount = new AtomicLong(0); - - - - private long lastTime = System.currentTimeMillis(); - private long currTime; - private long initTime = System.currentTimeMillis(); - - private static final List colors = Collections.unmodifiableList( - Arrays.asList("#9966ff", "#ff9933", "#ffff99", "#3366ff", "#0099cc", "#669999", "#66ffff")); - - private Client client = ClientBuilder.newClient(); - private WebTarget target; - private WebTarget targetState; - - private static Logger log = LoggerFactory.getLogger(RemoteFlowIterationListener.class); - - /** - * Creates IterationListener and keeps it detached from any UiServer instances - */ - protected RemoteFlowIterationListener() { - // please keep this constructor protected - } - - - - public RemoteFlowIterationListener(@NonNull UiConnectionInfo connectionInfo, int frequency) { - setup(connectionInfo); - this.frequency = frequency; - } - - private void setup(@NonNull UiConnectionInfo connectionInfo) { - // TODO: add auth option - - this.connectionInfo = connectionInfo; - - java.util.logging.Logger logger = java.util.logging.Logger.getGlobal(); - login = null; - password = null; - // client.register(new LoggingFilter(logger, true)); - if (login == null || password == null) - target = client.target(connectionInfo.getFirstPart()).path(connectionInfo.getSecondPart("flow")) - .path("info").queryParam("sid", connectionInfo.getSessionId()); - - targetState = client.target(connectionInfo.getFirstPart()).path(connectionInfo.getSecondPart("flow")) - .path("state").queryParam("sid", connectionInfo.getSessionId()); - this.path = connectionInfo.getFullAddress("flow"); - - - log.info("Flow UI address: " + this.path); - } - - /** - * Get if listener invoked - */ - @Override - public boolean invoked() { - return false; - } - - /** - * Change invoke to true - */ - @Override - public void invoke() { - - } - - /** - * Event listener for each iteration - * - * @param model the model iterating - * @param iteration the iteration - */ - @Override - public synchronized void iterationDone(Model model, int iteration) { - if (iterationCount.incrementAndGet() % frequency == 0) { - currTime = System.currentTimeMillis(); - /* - Basic plan: - 1. We should detect, if that's CompGraph or MultilayerNetwork. However the actual difference will be limited to number of non-linear connections. - 2. Network structure should be converted to JSON - 3. Params for each node should be packed to JSON as well - 4. For specific cases (like CNN) binary data should be wrapped into base64 - 5. For arrays/params gzip could be used (to be investigated) - ...... - Later, on client side, this JSON should be parsed and rendered. So, proper object structure to be considered. - */ - - // update modelState - buildModelState(model); - - // On first pass we just build list of layers. However, for MultiLayerNetwork first pass is the last pass, since we know connections in advance - ModelInfo info = buildModelInfo(model); - - - - /* - as soon as model info is built, we need to define color scheme based on number of unique nodes - */ - - // send ModelInfo to UiServer - Response resp = target.request(MediaType.APPLICATION_JSON).accept(MediaType.APPLICATION_JSON) - .post(Entity.entity(info, MediaType.APPLICATION_JSON)); - log.debug("Response: " + resp); - - // send ModelState to UiServer - resp = targetState.request(MediaType.APPLICATION_JSON).accept(MediaType.APPLICATION_JSON) - .post(Entity.entity(modelState, MediaType.APPLICATION_JSON)); - log.debug("Response: " + resp); - /* - TODO: it would be nice to send updates of nodes as well - */ - - if (firstIteration) { - firstIteration = false; - } - } - - lastTime = System.currentTimeMillis(); - } - - /** - * This method returns all Layers connected to the currentInput - * - * @param vertices - * @param currentInput - * @param currentY - * @return - */ - protected List flattenToY(ModelInfo model, GraphVertex[] vertices, List currentInput, - int currentY) { - List results = new ArrayList<>(); - int x = 0; - for (int v = 0; v < vertices.length; v++) { - GraphVertex vertex = vertices[v]; - VertexIndices[] indices = vertex.getInputVertices(); - - if (indices != null) - for (int i = 0; i < indices.length; i++) { - GraphVertex cv = vertices[indices[i].getVertexIndex()]; - String inputName = cv.getVertexName(); - - for (String input : currentInput) { - if (inputName.equals(input)) { - // we have match for Vertex - // log.info("Vertex: " + vertex.getVertexName() + " has Input: " + input); - try { - LayerInfo info = model.getLayerInfoByName(vertex.getVertexName()); - if (info == null) - info = getLayerInfo(vertex.getLayer(), x, currentY, 121); - info.setName(vertex.getVertexName()); - - // special case here: vertex isn't a layer - if (vertex.getLayer() == null) { - info.setLayerType(vertex.getClass().getSimpleName()); - } - if (info.getName().endsWith("-merge")) - info.setLayerType("MERGE"); - if (model.getLayerInfoByName(vertex.getVertexName()) == null) { - x++; - model.addLayer(info); - results.add(info); - } - - // now we should map connections - LayerInfo connection = model.getLayerInfoByName(input); - if (connection != null) { - connection.addConnection(info); - // log.info("Adding connection ["+ connection.getName()+"] -> ["+ info.getName()+"]"); - } else { - // the only reason to have null here, is direct input connection - //connection.addConnection(0,0); - } - } catch (Exception e) { - e.printStackTrace(); - } - } - } - } - } - return results; - } - - protected void buildModelState(Model model) { - // first we update performance state - long timeSpent = currTime - lastTime; - float timeSec = timeSpent / 1000f; - - INDArray input = model.input(); - long tadLength = Shape.getTADLength(input.shape(), ArrayUtil.range(1, input.rank())); - - long numSamples = input.lengthLong() / tadLength; - - modelState.addPerformanceSamples(numSamples / timeSec); - modelState.addPerformanceBatches(1 / timeSec); - modelState.setIterationTime(timeSpent); - - // now model score - modelState.addScore((float) model.score()); - modelState.setScore((float) model.score()); - - modelState.setTrainingTime(parseTime(System.currentTimeMillis() - initTime)); - - // and now update model params/gradients - Map newGrad = new LinkedHashMap<>(); - - Map newParams = new LinkedHashMap<>(); - Map params = model.paramTable(); - - Layer[] layers = null; - if (model instanceof MultiLayerNetwork) { - layers = ((MultiLayerNetwork) model).getLayers(); - } else if (model instanceof ComputationGraph) { - layers = ((ComputationGraph) model).getLayers(); - } - - List lrs = new ArrayList<>(); - if (layers != null) { - for (Layer layer : layers) { - if (layer.conf().getLayer() instanceof BaseLayer) { - lrs.add(((BaseLayer) layer.conf().getLayer()).getLearningRate()); - } else { - lrs.add(0.0); - } - - } - modelState.setLearningRates(lrs); - } - Map layerParamsMap = new LinkedHashMap<>(); - - for (Map.Entry entry : params.entrySet()) { - String param = entry.getKey(); - if (!Character.isDigit(param.charAt(0))) - continue; - - int layer = Integer.parseInt(param.replaceAll("\\_.*$", "")); - String key = param.replaceAll("^.*?_", "").toLowerCase(); - - if (!layerParamsMap.containsKey(layer)) - layerParamsMap.put(layer, new LayerParams()); - - HistogramBin histogram = - new HistogramBin.Builder(entry.getValue().dup()).setBinCount(14).setRounding(6).build(); - - // TODO: something better would be nice to have here - if (key.equalsIgnoreCase("w")) { - layerParamsMap.get(layer).setW(histogram.getData()); - } else if (key.equalsIgnoreCase("rw")) { - layerParamsMap.get(layer).setRW(histogram.getData()); - } else if (key.equalsIgnoreCase("rwf")) { - layerParamsMap.get(layer).setRWF(histogram.getData()); - } else if (key.equalsIgnoreCase("b")) { - layerParamsMap.get(layer).setB(histogram.getData()); - } - } - modelState.setLayerParams(layerParamsMap); - } - - protected ModelInfo buildModelInfo(Model model) { - ModelInfo modelInfo = new ModelInfo(); - if (model instanceof ComputationGraph) { - ComputationGraph graph = (ComputationGraph) model; - - /* - we assume that graph starts on input. every layer connected to input - is on y1 - every layer connected to y1, is on y2 etc. - */ - List inputs = graph.getConfiguration().getNetworkInputs(); - // now we need to add inputs as y0 nodes - int x = 0; - for (String input : inputs) { - GraphVertex vertex = graph.getVertex(input); - INDArray gInput = vertex.getInputs()[0]; - long tadLength = Shape.getTADLength(gInput.shape(), ArrayUtil.range(1, gInput.rank())); - - long numSamples = gInput.lengthLong() / tadLength; - - StringBuilder builder = new StringBuilder(); - builder.append("Vertex name: ").append(input).append("
"); - builder.append("Model input").append("
"); - builder.append("Input size: ").append(tadLength).append("
"); - builder.append("Batch size: ").append(numSamples).append("
"); - - LayerInfo info = new LayerInfo(); - info.setId(0); - info.setName(input); - info.setY(0); - info.setX(x); - info.setLayerType(INPUT); - info.setDescription(new Description()); - info.getDescription().setMainLine("Model input"); - info.getDescription().setText(builder.toString()); - modelInfo.addLayer(info); - x++; - } - - GraphVertex[] vertices = graph.getVertices(); - - // filling grid in LTR/TTB direction - List needle = new ArrayList<>(); - - - // we assume that max row can't be higher then total number of vertices - for (int y = 1; y < vertices.length; y++) { - if (needle.isEmpty()) - needle.addAll(inputs); - - /* - for each grid row we look for nodes, that are connected to previous layer - */ - List layersForGridY = flattenToY(modelInfo, vertices, needle, y); - - needle.clear(); - for (LayerInfo layerInfo : layersForGridY) { - needle.add(layerInfo.getName()); - } - if (needle.isEmpty()) - break; - } - - } else if (model instanceof MultiLayerNetwork) { - MultiLayerNetwork network = (MultiLayerNetwork) model; - - // manually adding input layer - - INDArray input = model.input(); - long tadLength = Shape.getTADLength(input.shape(), ArrayUtil.range(1, input.rank())); - - long numSamples = input.lengthLong() / tadLength; - - StringBuilder builder = new StringBuilder(); - builder.append("Model input").append("
"); - builder.append("Input size: ").append(tadLength).append("
"); - builder.append("Batch size: ").append(numSamples).append("
"); - - LayerInfo info = new LayerInfo(); - info.setId(0); - info.setName("Input"); - info.setY(0); - info.setX(0); - info.setLayerType(INPUT); - info.setDescription(new Description()); - info.getDescription().setMainLine("Model input"); - info.getDescription().setText(builder.toString()); - info.addConnection(0, 1); - modelInfo.addLayer(info); - - - // entry 0 is reserved for inputs - int y = 1; - - // for MLN x value is always 0 - final int x = 0; - for (Layer layer : network.getLayers()) { - LayerInfo layerInfo = getLayerInfo(layer, x, y, y); - // since it's MLN, we know connections in advance as curLayer + 1 - layerInfo.addConnection(x, y + 1); - modelInfo.addLayer(layerInfo); - y++; - } - - LayerInfo layerInfo = modelInfo.getLayerInfoByCoords(x, y - 1); - layerInfo.dropConnections(); - - } // else throw new IllegalStateException("Model ["+model.getClass().getCanonicalName()+"] doesn't looks like supported one."); - - // find layers without connections, and mark them as output layers - for (LayerInfo layerInfo : modelInfo.getLayers()) { - if (layerInfo.getConnections().size() == 0) - layerInfo.setLayerType("OUTPUT"); - } - - // now we apply colors to distinct layer types - AtomicInteger cnt = new AtomicInteger(0); - for (String layerType : modelInfo.getLayerTypes()) { - String curColor = colors.get(cnt.getAndIncrement()); - if (cnt.get() >= colors.size()) - cnt.set(0); - for (LayerInfo layerInfo : modelInfo.getLayersByType(layerType)) { - if (layerType.equals(INPUT)) { - layerInfo.setColor("#99ff66"); - } else if (layerType.equals("OUTPUT")) { - layerInfo.setColor("#e6e6e6"); - } else { - layerInfo.setColor(curColor); - } - } - } - return modelInfo; - } - - private LayerInfo getLayerInfo(Layer layer, int x, int y, int order) { - LayerInfo info = new LayerInfo(); - - - // set coordinates - info.setX(x); - info.setY(y); - - // if name was set, we should grab it - try { - info.setName(layer.conf().getLayer().getLayerName()); - } catch (Exception e) { - } - if (info.getName() == null || info.getName().isEmpty()) - info.setName("unnamed"); - - // unique layer id required here - info.setId(order); - - // set layer description according to layer params - Description description = new Description(); - info.setDescription(description); - - // set layer type - try { - info.setLayerType(layer.getClass().getSimpleName().replaceAll("Layer$", "")); - } catch (Exception e) { - info.setLayerType("n/a"); - return info; - } - - - StringBuilder mainLine = new StringBuilder(); - StringBuilder subLine = new StringBuilder(); - StringBuilder fullLine = new StringBuilder(); - - // log.info("Layer: " + info.getName() + " class: " + layer.getClass().getSimpleName()); - - if (layer.type().equals(Layer.Type.CONVOLUTIONAL)) { - org.deeplearning4j.nn.conf.layers.ConvolutionLayer layer1 = - (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) layer.conf().getLayer(); - mainLine.append("K: " + Arrays.toString(layer1.getKernelSize()) + " S: " - + Arrays.toString(layer1.getStride()) + " P: " + Arrays.toString(layer1.getPadding())); - subLine.append("nIn/nOut: [" + layer1.getNIn() + "/" + layer1.getNOut() + "]"); - fullLine.append("Kernel size: ").append(Arrays.toString(layer1.getKernelSize())).append("
"); - fullLine.append("Stride: ").append(Arrays.toString(layer1.getStride())).append("
"); - fullLine.append("Padding: ").append(Arrays.toString(layer1.getPadding())).append("
"); - fullLine.append("Inputs number: ").append(layer1.getNIn()).append("
"); - fullLine.append("Outputs number: ").append(layer1.getNOut()).append("
"); - } else if (layer.conf().getLayer() instanceof SubsamplingLayer) { - SubsamplingLayer layer1 = (SubsamplingLayer) layer.conf().getLayer(); - fullLine.append("Kernel size: ").append(Arrays.toString(layer1.getKernelSize())).append("
"); - fullLine.append("Stride: ").append(Arrays.toString(layer1.getStride())).append("
"); - fullLine.append("Padding: ").append(Arrays.toString(layer1.getPadding())).append("
"); - fullLine.append("Pooling type: ").append(layer1.getPoolingType().toString()).append("
"); - } else if (layer.conf().getLayer() instanceof FeedForwardLayer) { - FeedForwardLayer layer1 = (FeedForwardLayer) layer.conf().getLayer(); - mainLine.append("nIn/nOut: [" + layer1.getNIn() + "/" + layer1.getNOut() + "]"); - subLine.append(info.getLayerType()); - fullLine.append("Inputs number: ").append(layer1.getNIn()).append("
"); - fullLine.append("Outputs number: ").append(layer1.getNOut()).append("
"); - } else { - // TODO: Introduce Layer.Type.OUTPUT - if (layer instanceof BaseOutputLayer) { - mainLine.append("Outputs: [" + ((BaseOutputLayer) layer.conf().getLayer()).getNOut() + "]"); - fullLine.append("Outputs number: ").append(((BaseOutputLayer) layer.conf().getLayer()).getNOut()) - .append("
"); - } - } - - String afn; - if (layer.conf().getLayer() instanceof BaseLayer) { - afn = ((BaseLayer) layer.conf().getLayer()).getActivationFn().toString(); - } else { - afn = "n/a"; - } - subLine.append(" A: [").append(afn).append("]"); - fullLine.append("Activation function: ").append("").append(afn).append("").append("
"); - - description.setMainLine(mainLine.toString()); - description.setSubLine(subLine.toString()); - description.setText(fullLine.toString()); - - return info; - } - - protected String parseTime(long milliseconds) { - return String.format(FORMAT, TimeUnit.MILLISECONDS.toHours(milliseconds), - TimeUnit.MILLISECONDS.toMinutes(milliseconds) - - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(milliseconds)), - TimeUnit.MILLISECONDS.toSeconds(milliseconds) - - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(milliseconds))); - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui-remote-iterationlisteners/src/main/java/org/deeplearning4j/ui/weights/RemoteHistogramIterationListener.java b/deeplearning4j-ui-parent/deeplearning4j-ui-remote-iterationlisteners/src/main/java/org/deeplearning4j/ui/weights/RemoteHistogramIterationListener.java deleted file mode 100644 index 566f71888605..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui-remote-iterationlisteners/src/main/java/org/deeplearning4j/ui/weights/RemoteHistogramIterationListener.java +++ /dev/null @@ -1,206 +0,0 @@ -package org.deeplearning4j.ui.weights; - - -import lombok.NonNull; -import org.deeplearning4j.nn.api.Model; -import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.ui.UiConnectionInfo; -import org.deeplearning4j.ui.weights.beans.CompactModelAndGradient; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.ws.rs.client.Client; -import javax.ws.rs.client.ClientBuilder; -import javax.ws.rs.client.Entity; -import javax.ws.rs.client.WebTarget; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; -import java.util.*; - -/** - * - * A histogram iteration listener that - * updates the weights of the model - * with a web based ui. - * - * @author Adam Gibson - */ -public class RemoteHistogramIterationListener implements IterationListener { - private static final Logger log = LoggerFactory.getLogger(RemoteHistogramIterationListener.class); - private Client client = ClientBuilder.newClient(); - private WebTarget target; - private int iterations = 1; - private int curIteration = 0; - private ArrayList scoreHistory = new ArrayList<>(); - private List>> meanMagHistoryParams = new ArrayList<>(); //1 map per layer; keyed by new param name - private List>> meanMagHistoryUpdates = new ArrayList<>(); - private Map layerNameIndexes = new HashMap<>(); - private List layerNames = new ArrayList<>(); - private int layerNameIndexesCount = 0; - private boolean firstIteration = true; - private String path; - private String subPath = "weights"; - private UiConnectionInfo connectionInfo; - - public RemoteHistogramIterationListener(@NonNull UiConnectionInfo connection, int iterations) { - target = client.target(connection.getFirstPart()).path(connection.getSecondPart(subPath)).path("update") - .queryParam("sid", connection.getSessionId()); - this.connectionInfo = connection; - - this.iterations = iterations; - - System.out.println("UI Histogram URL: " + connection.getFullAddress()); - } - - - @Override - public boolean invoked() { - return false; - } - - @Override - public void invoke() { - - } - - @Override - public void iterationDone(Model model, int iteration) { - if (curIteration % iterations == 0) { - Map newGrad = new LinkedHashMap<>(); - try { - Map grad = model.gradient().gradientForVariable(); - - // log.warn("Starting report building..."); - - if (meanMagHistoryParams.isEmpty()) { - //Initialize: - int maxLayerIdx = -1; - for (String s : grad.keySet()) { - maxLayerIdx = Math.max(maxLayerIdx, indexFromString(s)); - } - if (maxLayerIdx == -1) - maxLayerIdx = 0; - for (int i = 0; i <= maxLayerIdx; i++) { - meanMagHistoryParams.add(new LinkedHashMap>()); - meanMagHistoryUpdates.add(new LinkedHashMap>()); - } - } - - //Process gradients: duplicate + calculate and store mean magnitudes - - for (Map.Entry entry : grad.entrySet()) { - String param = entry.getKey(); - String newName; - if (Character.isDigit(param.charAt(0))) - newName = "param_" + param; - else - newName = param; - HistogramBin histogram = new HistogramBin.Builder(entry.getValue().dup()).setBinCount(20) - .setRounding(6).build(); - newGrad.put(newName, histogram.getData()); - //CSS identifier can't start with digit http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier - - - int idx = indexFromString(param); - if (idx >= meanMagHistoryUpdates.size()) { - //log.info("Can't find idx for update ["+newName+"]"); - meanMagHistoryUpdates.add(new LinkedHashMap>()); - } - - - //Work out layer index: - Map> map = meanMagHistoryUpdates.get(idx); - List list = map.get(newName); - if (list == null) { - list = new ArrayList<>(); - map.put(newName, list); - } - double meanMag = entry.getValue().norm1Number().doubleValue() / entry.getValue().length(); - list.add(meanMag); - } - } catch (Exception e) { - log.warn("Skipping gradients update"); - } - - //Process parameters: duplicate + calculate and store mean magnitudes - Map params = model.paramTable(); - Map newParams = new LinkedHashMap<>(); - for (Map.Entry entry : params.entrySet()) { - String param = entry.getKey(); - String newName; - if (Character.isDigit(param.charAt(0))) - newName = "param_" + param; - else - newName = param; - - HistogramBin histogram = - new HistogramBin.Builder(entry.getValue().dup()).setBinCount(20).setRounding(6).build(); - newParams.put(newName, histogram.getData()); - //dup() because params might be a view - - int idx = indexFromString(param); - if (idx >= meanMagHistoryParams.size()) { - //log.info("Can't find idx for param ["+newName+"]"); - meanMagHistoryParams.add(new LinkedHashMap>()); - } - - Map> map = meanMagHistoryParams.get(idx); - List list = map.get(newName); - if (list == null) { - list = new ArrayList<>(); - map.put(newName, list); - } - double meanMag = entry.getValue().norm1Number().doubleValue() / entry.getValue().length(); - list.add(meanMag); - } - - - double score = model.score(); - scoreHistory.add(score); - //log.info("Saving score: " + score); - - CompactModelAndGradient g = new CompactModelAndGradient(); - g.setGradients(newGrad); - g.setParameters(newParams); - g.setScore(score); - g.setScores(scoreHistory); - g.setPath(subPath); - g.setUpdateMagnitudes(meanMagHistoryUpdates); - g.setParamMagnitudes(meanMagHistoryParams); - g.setLayerNames(layerNames); - g.setLastUpdateTime(System.currentTimeMillis()); - - - Response resp = target.request(MediaType.APPLICATION_JSON).accept(MediaType.APPLICATION_JSON) - .post(Entity.entity(g, MediaType.APPLICATION_JSON)); - log.debug("{}", resp); - - if (firstIteration) { - StringBuilder builder = new StringBuilder(connectionInfo.getFullAddress()); - builder.append(subPath).append("?sid=").append(connectionInfo.getSessionId()); - firstIteration = false; - } - } - - curIteration += 1; - } - - private int indexFromString(String str) { - int underscore = str.indexOf('_'); - if (underscore == -1) { - if (!layerNameIndexes.containsKey(str)) { - layerNames.add(str); - layerNameIndexes.put(str, layerNameIndexesCount++); - } - return layerNameIndexes.get(str); - } else { - String subStr = str.substring(0, underscore); - if (!layerNameIndexes.containsKey(subStr)) { - layerNames.add(subStr); - layerNameIndexes.put(subStr, layerNameIndexesCount++); - } - return layerNameIndexes.get(subStr); - } - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui/src/main/java/org/deeplearning4j/ui/flow/FlowIterationListener.java b/deeplearning4j-ui-parent/deeplearning4j-ui/src/main/java/org/deeplearning4j/ui/flow/FlowIterationListener.java deleted file mode 100644 index b4ae6bd26b93..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui/src/main/java/org/deeplearning4j/ui/flow/FlowIterationListener.java +++ /dev/null @@ -1,566 +0,0 @@ -package org.deeplearning4j.ui.flow; - -import lombok.NonNull; -import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.api.storage.Persistable; -import org.deeplearning4j.api.storage.StatsStorage; -import org.deeplearning4j.api.storage.StatsStorageRouter; -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; -import org.deeplearning4j.nn.conf.layers.BaseLayer; -import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; -import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; -import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; -import org.deeplearning4j.nn.graph.ComputationGraph; -import org.deeplearning4j.nn.graph.vertex.GraphVertex; -import org.deeplearning4j.nn.graph.vertex.VertexIndices; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.ui.UiConnectionInfo; -import org.deeplearning4j.ui.UiUtils; -import org.deeplearning4j.ui.api.UIServer; -import org.deeplearning4j.ui.flow.beans.*; -import org.deeplearning4j.ui.flow.data.FlowStaticPersistable; -import org.deeplearning4j.ui.flow.data.FlowUpdatePersistable; -import org.deeplearning4j.ui.stats.StatsListener; -import org.deeplearning4j.ui.storage.mapdb.MapDBStatsStorage; -import org.deeplearning4j.ui.weights.HistogramBin; -import org.deeplearning4j.util.UIDProvider; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.shape.Shape; -import org.nd4j.linalg.util.ArrayUtil; - -import java.util.*; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; - -/** - * This IterationListener is suited for general model performance/architecture overview - * - * @deprecated Use {@link StatsListener} and {@link UIServer#attach(StatsStorage)}. See examples repo for how. - * - * @author raver119@gmail.com - */ -@Slf4j -@Deprecated -public class FlowIterationListener implements IterationListener { - - private static final String FORMAT = "%02d:%02d:%02d"; - public static final String INPUT = "INPUT"; - private int frequency = 1; - private boolean firstIteration = true; - private ModelState modelState = new ModelState(); - - private AtomicLong iterationCount = new AtomicLong(0); - - - private long lastTime = System.currentTimeMillis(); - private long currTime; - private long initTime = System.currentTimeMillis(); - - private static final List colors = Collections.unmodifiableList( - Arrays.asList("#9966ff", "#ff9933", "#ffff99", "#3366ff", "#0099cc", "#669999", "#66ffff")); - - private final StatsStorageRouter ssr; - private final String sessionID; - private final String workerID; - private boolean openBrowser; - - protected FlowIterationListener() { - this(1); - // please keep this constructor protected - } - - /** - * Creates IterationListener and attaches it local UIServer instance - * - * @param frequency update frequency - */ - public FlowIterationListener(int frequency) { - this(new MapDBStatsStorage(), frequency, null, null, true); - } - - @Deprecated - public FlowIterationListener(@NonNull String address, int port, int frequency) { - this(frequency); - } - - public FlowIterationListener(StatsStorageRouter ssr, int frequency, String sessionID, String workerID, - boolean openBrowser) { - this.frequency = frequency; - - this.ssr = ssr; - if (sessionID == null) { - this.sessionID = UUID.randomUUID().toString(); - } else { - this.sessionID = sessionID; - } - if (workerID == null) { - this.workerID = UIDProvider.getJVMUID() + "_" + Thread.currentThread().getId(); - } else { - this.workerID = workerID; - } - - this.openBrowser = openBrowser; - if (ssr instanceof StatsStorage && openBrowser) { - UIServer.getInstance().attach((StatsStorage) ssr); - } - - System.out.println( - "FlowIterationListener path: http://localhost:" + UIServer.getInstance().getPort() + "/flow"); - } - - - @Deprecated - public FlowIterationListener(@NonNull UiConnectionInfo connectionInfo, int frequency) { - this(frequency); - } - - /** - * Get if listener invoked - */ - @Override - public boolean invoked() { - return false; - } - - /** - * Change invoke to true - */ - @Override - public void invoke() { - - } - - /** - * Event listener for each iteration - * - * @param model the model iterating - * @param iteration the iteration - */ - @Override - public synchronized void iterationDone(Model model, int iteration) { - if (iterationCount.incrementAndGet() % frequency == 0) { - currTime = System.currentTimeMillis(); - /* - Basic plan: - 1. We should detect, if that's CompGraph or MultilayerNetwork. However the actual difference will be limited to number of non-linear connections. - 2. Network structure should be converted to JSON - 3. Params for each node should be packed to JSON as well - 4. For specific cases (like CNN) binary data should be wrapped into base64 - 5. For arrays/params gzip could be used (to be investigated) - ...... - Later, on client side, this JSON should be parsed and rendered. So, proper object structure to be considered. - */ - - if (firstIteration) { - // On first pass we just build list of layers. However, for MultiLayerNetwork first pass is the last pass, since we know connections in advance - ModelInfo info = buildModelInfo(model); - - // send ModelInfo to stats storage - Persistable staticInfo = - new FlowStaticPersistable(sessionID, workerID, System.currentTimeMillis(), info); - ssr.putStaticInfo(staticInfo); - } - - - // update modelState - buildModelState(model); - Persistable updateInfo = - new FlowUpdatePersistable(sessionID, workerID, System.currentTimeMillis(), modelState); - ssr.putUpdate(updateInfo); - - - if (firstIteration && openBrowser) { - UIServer uiServer = UIServer.getInstance(); - String path = "http://localhost:" + uiServer.getPort() + "/flow?sid=" + sessionID; - try { - UiUtils.tryOpenBrowser(path, log); - } catch (Exception e) { - } - firstIteration = false; - } - } - - lastTime = System.currentTimeMillis(); - } - - /** - * This method returns all Layers connected to the currentInput - * - * @param vertices - * @param currentInput - * @param currentY - * @return - */ - protected List flattenToY(ModelInfo model, GraphVertex[] vertices, List currentInput, - int currentY) { - List results = new ArrayList<>(); - int x = 0; - for (int v = 0; v < vertices.length; v++) { - GraphVertex vertex = vertices[v]; - VertexIndices[] indices = vertex.getInputVertices(); - - if (indices != null) - for (int i = 0; i < indices.length; i++) { - GraphVertex cv = vertices[indices[i].getVertexIndex()]; - String inputName = cv.getVertexName(); - - for (String input : currentInput) { - if (inputName.equals(input)) { - // we have match for Vertex - // log.info("Vertex: " + vertex.getVertexName() + " has Input: " + input); - try { - LayerInfo info = model.getLayerInfoByName(vertex.getVertexName()); - if (info == null) - info = getLayerInfo(vertex.getLayer(), x, currentY, 121); - info.setName(vertex.getVertexName()); - - // special case here: vertex isn't a layer - if (vertex.getLayer() == null) { - info.setLayerType(vertex.getClass().getSimpleName()); - } - if (info.getName().endsWith("-merge")) - info.setLayerType("MERGE"); - if (model.getLayerInfoByName(vertex.getVertexName()) == null) { - x++; - model.addLayer(info); - results.add(info); - } - - // now we should map connections - LayerInfo connection = model.getLayerInfoByName(input); - if (connection != null) { - connection.addConnection(info); - // log.info("Adding connection ["+ connection.getName()+"] -> ["+ info.getName()+"]"); - } else { - // the only reason to have null here, is direct input connection - //connection.addConnection(0,0); - } - } catch (Exception e) { - e.printStackTrace(); - } - } - } - } - } - return results; - } - - protected void buildModelState(Model model) { - // first we update performance state - long timeSpent = currTime - lastTime; - float timeSec = timeSpent / 1000f; - - INDArray input = model.input(); - long tadLength = Shape.getTADLength(input.shape(), ArrayUtil.range(1, input.rank())); - - long numSamples = input.lengthLong() / tadLength; - - modelState.addPerformanceSamples(numSamples / timeSec); - modelState.addPerformanceBatches(1 / timeSec); - modelState.setIterationTime(timeSpent); - - // now model score - modelState.addScore((float) model.score()); - modelState.setScore((float) model.score()); - - modelState.setTrainingTime(parseTime(System.currentTimeMillis() - initTime)); - - // and now update model params/gradients - Map newGrad = new LinkedHashMap<>(); - - Map newParams = new LinkedHashMap<>(); - Map params = model.paramTable(); - - Layer[] layers = null; - if (model instanceof MultiLayerNetwork) { - layers = ((MultiLayerNetwork) model).getLayers(); - } else if (model instanceof ComputationGraph) { - layers = ((ComputationGraph) model).getLayers(); - } - - List lrs = new ArrayList<>(); - if (layers != null) { - for (Layer layer : layers) { - if (layer.conf().getLayer() instanceof BaseLayer) { - lrs.add(((BaseLayer) layer.conf().getLayer()).getLearningRate()); - } else { - lrs.add(0.0); - } - } - modelState.setLearningRates(lrs); - } - Map layerParamsMap = new LinkedHashMap<>(); - - for (Map.Entry entry : params.entrySet()) { - String param = entry.getKey(); - if (!Character.isDigit(param.charAt(0))) - continue; - - int layer = Integer.parseInt(param.replaceAll("\\_.*$", "")); - String key = param.replaceAll("^.*?_", "").toLowerCase(); - - if (!layerParamsMap.containsKey(layer)) - layerParamsMap.put(layer, new LayerParams()); - - HistogramBin histogram = - new HistogramBin.Builder(entry.getValue().dup()).setBinCount(14).setRounding(6).build(); - - // TODO: something better would be nice to have here - if (key.equalsIgnoreCase("w")) { - layerParamsMap.get(layer).setW(histogram.getData()); - } else if (key.equalsIgnoreCase("rw")) { - layerParamsMap.get(layer).setRW(histogram.getData()); - } else if (key.equalsIgnoreCase("rwf")) { - layerParamsMap.get(layer).setRWF(histogram.getData()); - } else if (key.equalsIgnoreCase("b")) { - layerParamsMap.get(layer).setB(histogram.getData()); - } - } - modelState.setLayerParams(layerParamsMap); - } - - protected ModelInfo buildModelInfo(Model model) { - ModelInfo modelInfo = new ModelInfo(); - if (model instanceof ComputationGraph) { - ComputationGraph graph = (ComputationGraph) model; - - /* - we assume that graph starts on input. every layer connected to input - is on y1 - every layer connected to y1, is on y2 etc. - */ - List inputs = graph.getConfiguration().getNetworkInputs(); - // now we need to add inputs as y0 nodes - int x = 0; - for (String input : inputs) { - GraphVertex vertex = graph.getVertex(input); - - long numSamples; - long tadLength; - if (vertex.getInputs() == null || vertex.getInputs().length == 0) { - numSamples = 0; - tadLength = 0; - } else { - INDArray gInput = vertex.getInputs()[0]; - tadLength = Shape.getTADLength(gInput.shape(), ArrayUtil.range(1, gInput.rank())); - numSamples = gInput.lengthLong() / tadLength; - } - - - - StringBuilder builder = new StringBuilder(); - builder.append("Vertex name: ").append(input).append("
"); - builder.append("Model input").append("
"); - builder.append("Input size: ").append(tadLength).append("
"); - builder.append("Batch size: ").append(numSamples).append("
"); - - LayerInfo info = new LayerInfo(); - info.setId(0); - info.setName(input); - info.setY(0); - info.setX(x); - info.setLayerType(INPUT); - info.setDescription(new Description()); - info.getDescription().setMainLine("Model input"); - info.getDescription().setText(builder.toString()); - modelInfo.addLayer(info); - x++; - } - - GraphVertex[] vertices = graph.getVertices(); - - // filling grid in LTR/TTB direction - List needle = new ArrayList<>(); - - - // we assume that max row can't be higher then total number of vertices - for (int y = 1; y < vertices.length; y++) { - if (needle.isEmpty()) - needle.addAll(inputs); - - /* - for each grid row we look for nodes, that are connected to previous layer - */ - List layersForGridY = flattenToY(modelInfo, vertices, needle, y); - - needle.clear(); - for (LayerInfo layerInfo : layersForGridY) { - needle.add(layerInfo.getName()); - } - if (needle.isEmpty()) - break; - } - - } else if (model instanceof MultiLayerNetwork) { - MultiLayerNetwork network = (MultiLayerNetwork) model; - - // manually adding input layer - - INDArray input = model.input(); - long tadLength = Shape.getTADLength(input.shape(), ArrayUtil.range(1, input.rank())); - - long numSamples = input.lengthLong() / tadLength; - - StringBuilder builder = new StringBuilder(); - builder.append("Model input").append("
"); - builder.append("Input size: ").append(tadLength).append("
"); - builder.append("Batch size: ").append(numSamples).append("
"); - - LayerInfo info = new LayerInfo(); - info.setId(0); - info.setName("Input"); - info.setY(0); - info.setX(0); - info.setLayerType(INPUT); - info.setDescription(new Description()); - info.getDescription().setMainLine("Model input"); - info.getDescription().setText(builder.toString()); - info.addConnection(0, 1); - modelInfo.addLayer(info); - - - // entry 0 is reserved for inputs - int y = 1; - - // for MLN x value is always 0 - final int x = 0; - for (Layer layer : network.getLayers()) { - LayerInfo layerInfo = getLayerInfo(layer, x, y, y); - // since it's MLN, we know connections in advance as curLayer + 1 - layerInfo.addConnection(x, y + 1); - modelInfo.addLayer(layerInfo); - y++; - } - - LayerInfo layerInfo = modelInfo.getLayerInfoByCoords(x, y - 1); - layerInfo.dropConnections(); - - } // else throw new IllegalStateException("Model ["+model.getClass().getCanonicalName()+"] doesn't looks like supported one."); - - // find layers without connections, and mark them as output layers - for (LayerInfo layerInfo : modelInfo.getLayers()) { - if (layerInfo.getConnections().size() == 0) - layerInfo.setLayerType("OUTPUT"); - } - - // now we apply colors to distinct layer types - AtomicInteger cnt = new AtomicInteger(0); - for (String layerType : modelInfo.getLayerTypes()) { - String curColor = colors.get(cnt.getAndIncrement()); - if (cnt.get() >= colors.size()) - cnt.set(0); - for (LayerInfo layerInfo : modelInfo.getLayersByType(layerType)) { - if (layerType.equals(INPUT)) { - layerInfo.setColor("#99ff66"); - } else if (layerType.equals("OUTPUT")) { - layerInfo.setColor("#e6e6e6"); - } else { - layerInfo.setColor(curColor); - } - } - } - return modelInfo; - } - - private LayerInfo getLayerInfo(Layer layer, int x, int y, int order) { - LayerInfo info = new LayerInfo(); - - - // set coordinates - info.setX(x); - info.setY(y); - - // if name was set, we should grab it - try { - info.setName(layer.conf().getLayer().getLayerName()); - } catch (Exception e) { - } - if (info.getName() == null || info.getName().isEmpty()) - info.setName("unnamed"); - - // unique layer id required here - info.setId(order); - - // set layer description according to layer params - Description description = new Description(); - info.setDescription(description); - - // set layer type - try { - info.setLayerType(layer.getClass().getSimpleName().replaceAll("Layer$", "")); - } catch (Exception e) { - info.setLayerType("n/a"); - return info; - } - - - StringBuilder mainLine = new StringBuilder(); - StringBuilder subLine = new StringBuilder(); - StringBuilder fullLine = new StringBuilder(); - - // log.info("Layer: " + info.getName() + " class: " + layer.getClass().getSimpleName()); - - if (layer.type().equals(Layer.Type.CONVOLUTIONAL)) { - org.deeplearning4j.nn.conf.layers.ConvolutionLayer layer1 = - (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) layer.conf().getLayer(); - mainLine.append("K: " + Arrays.toString(layer1.getKernelSize()) + " S: " - + Arrays.toString(layer1.getStride()) + " P: " + Arrays.toString(layer1.getPadding())); - subLine.append("nIn/nOut: [" + layer1.getNIn() + "/" + layer1.getNOut() + "]"); - fullLine.append("Kernel size: ").append(Arrays.toString(layer1.getKernelSize())).append("
"); - fullLine.append("Stride: ").append(Arrays.toString(layer1.getStride())).append("
"); - fullLine.append("Padding: ").append(Arrays.toString(layer1.getPadding())).append("
"); - fullLine.append("Inputs number: ").append(layer1.getNIn()).append("
"); - fullLine.append("Outputs number: ").append(layer1.getNOut()).append("
"); - } else if (layer.conf().getLayer() instanceof SubsamplingLayer) { - SubsamplingLayer layer1 = (SubsamplingLayer) layer.conf().getLayer(); - fullLine.append("Kernel size: ").append(Arrays.toString(layer1.getKernelSize())).append("
"); - fullLine.append("Stride: ").append(Arrays.toString(layer1.getStride())).append("
"); - fullLine.append("Padding: ").append(Arrays.toString(layer1.getPadding())).append("
"); - fullLine.append("Pooling type: ").append(layer1.getPoolingType().toString()).append("
"); - } else if (layer.conf().getLayer() instanceof FeedForwardLayer) { - org.deeplearning4j.nn.conf.layers.FeedForwardLayer layer1 = - (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) layer.conf().getLayer(); - mainLine.append("nIn/nOut: [" + layer1.getNIn() + "/" + layer1.getNOut() + "]"); - subLine.append(info.getLayerType()); - fullLine.append("Inputs number: ").append(layer1.getNIn()).append("
"); - fullLine.append("Outputs number: ").append(layer1.getNOut()).append("
"); - } else { - // TODO: Introduce Layer.Type.OUTPUT - if (layer instanceof BaseOutputLayer) { - mainLine.append("Outputs: [" - + ((org.deeplearning4j.nn.conf.layers.BaseOutputLayer) layer.conf().getLayer()) - .getNOut() - + "]"); - fullLine.append("Outputs number: ").append( - ((org.deeplearning4j.nn.conf.layers.BaseOutputLayer) layer.conf().getLayer()).getNOut()) - .append("
"); - } - } - - String afn; - if (layer.conf().getLayer() instanceof BaseLayer) { - afn = ((BaseLayer) layer.conf().getLayer()).getActivationFn().toString(); - } else { - afn = "n/a"; - } - - subLine.append(" A: [").append(afn).append("]"); - fullLine.append("Activation function: ").append("").append(afn).append("").append("
"); - - description.setMainLine(mainLine.toString()); - description.setSubLine(subLine.toString()); - description.setText(fullLine.toString()); - - return info; - } - - protected String parseTime(long milliseconds) { - return String.format(FORMAT, TimeUnit.MILLISECONDS.toHours(milliseconds), - TimeUnit.MILLISECONDS.toMinutes(milliseconds) - - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(milliseconds)), - TimeUnit.MILLISECONDS.toSeconds(milliseconds) - - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(milliseconds))); - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui/src/main/java/org/deeplearning4j/ui/weights/HistogramIterationListener.java b/deeplearning4j-ui-parent/deeplearning4j-ui/src/main/java/org/deeplearning4j/ui/weights/HistogramIterationListener.java deleted file mode 100644 index ef36a7355d64..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui/src/main/java/org/deeplearning4j/ui/weights/HistogramIterationListener.java +++ /dev/null @@ -1,83 +0,0 @@ -package org.deeplearning4j.ui.weights; - - -import lombok.NonNull; -import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.api.storage.StatsStorage; -import org.deeplearning4j.nn.api.Model; -import org.deeplearning4j.ui.UiConnectionInfo; -import org.deeplearning4j.ui.UiUtils; -import org.deeplearning4j.ui.api.UIServer; -import org.deeplearning4j.ui.stats.StatsListener; -import org.deeplearning4j.ui.stats.api.StatsUpdateConfiguration; -import org.deeplearning4j.ui.stats.impl.DefaultStatsUpdateConfiguration; -import org.deeplearning4j.ui.storage.InMemoryStatsStorage; - -/** - * - * A histogram iteration listener that updates the weights of the model with a web based ui. - * - * @deprecated Use {@link StatsListener} and {@link UIServer#attach(StatsStorage)}. See examples repo for how. - * - * @author Adam Gibson - */ -@Deprecated -@Slf4j -public class HistogramIterationListener extends StatsListener { - private boolean openBrowser; - private boolean firstIteration = true; - private String path; - private static final String subPath = "weights"; - - public HistogramIterationListener(@NonNull UiConnectionInfo connection, int iterations) { - this(new InMemoryStatsStorage(), iterations, true); - - } - - public HistogramIterationListener(int iterations) { - this(iterations, true); - } - - public HistogramIterationListener(int iterations, boolean openBrowser) { - this(new InMemoryStatsStorage(), iterations, openBrowser); - } - - public HistogramIterationListener(StatsStorage ssr, int iterations, boolean openBrowser) { - super(ssr, null, getUpdateConfiguration(iterations), null, null); - int port = -1; - try { - UIServer server = UIServer.getInstance(); - port = server.getPort(); - } catch (Exception e) { - log.error("Error initializing UI server", e); - throw new RuntimeException(e); - } - - UIServer.getInstance().attach(ssr); - - this.path = "http://localhost:" + port + "/" + subPath; - this.openBrowser = openBrowser; - - System.out.println("UI Histogram URL: " + this.path); - } - - @Override - public void iterationDone(Model model, int iteration) { - super.iterationDone(model, iteration); - - if (openBrowser && firstIteration) { - StringBuilder builder = - new StringBuilder("http://localhost:").append(UIServer.getInstance().getPort()).append("/"); ///connectionInfo.getFullAddress()); - builder.append(subPath).append("?sid=").append(super.getSessionID()); - UiUtils.tryOpenBrowser(builder.toString(), log); - firstIteration = false; - } - } - - - private static StatsUpdateConfiguration getUpdateConfiguration(int iterations) { - //Note: we don't *need* all of these stats just for histogram listener - but other info - // is still available at /train - return new DefaultStatsUpdateConfiguration.Builder().reportingFrequency(iterations).build(); - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/ManualTests.java b/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/ManualTests.java index cb76f20727f8..2268a4e5d41c 100644 --- a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/ManualTests.java +++ b/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/ManualTests.java @@ -9,13 +9,15 @@ import org.deeplearning4j.models.embeddings.reader.impl.BasicModelUtils; import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.models.word2vec.Word2Vec; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; +import org.deeplearning4j.nn.conf.weightnoise.DropConnect; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.listeners.ScoreIterationListener; @@ -26,9 +28,7 @@ import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory; import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory; import org.deeplearning4j.ui.api.UIServer; -import org.deeplearning4j.ui.flow.FlowIterationListener; import org.deeplearning4j.ui.weights.ConvolutionalIterationListener; -import org.deeplearning4j.ui.weights.HistogramIterationListener; import org.junit.Ignore; import org.junit.Test; import org.nd4j.linalg.activations.Activation; @@ -40,6 +40,8 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.AdaGrad; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -108,67 +110,6 @@ public void testTsne() throws Exception { Thread.sleep(10000000000L); } - @Test - public void testHistograms() throws Exception { - final int numRows = 28; - final int numColumns = 28; - int outputNum = 10; - int numSamples = 60000; - int batchSize = 100; - int iterations = 10; - int seed = 123; - int listenerFreq = batchSize / 5; - - log.info("Load data...."); - DataSetIterator iter = new MnistDataSetIterator(batchSize, numSamples, true); - - log.info("Build model...."); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed) - .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(1.0).iterations(iterations).momentum(0.5) - .momentumAfter(Collections.singletonMap(3, 0.9)) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() - .layer(0, new RBM.Builder().nIn(numRows * numColumns).nOut(500).weightInit(WeightInit.XAVIER) - .lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE) - .visibleUnit(RBM.VisibleUnit.BINARY).hiddenUnit(RBM.HiddenUnit.BINARY).build()) - .layer(1, new RBM.Builder().nIn(500).nOut(250).weightInit(WeightInit.XAVIER) - .lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE) - .visibleUnit(RBM.VisibleUnit.BINARY).hiddenUnit(RBM.HiddenUnit.BINARY).build()) - .layer(2, new RBM.Builder().nIn(250).nOut(200).weightInit(WeightInit.XAVIER) - .lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE) - .visibleUnit(RBM.VisibleUnit.BINARY).hiddenUnit(RBM.HiddenUnit.BINARY).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .activation(Activation.SOFTMAX).nIn(200).nOut(outputNum).build()) - .pretrain(true).backprop(false).build(); - - // UiServer server = UiServer.getInstance(); - // UiConnectionInfo connectionInfo = server.getConnectionInfo(); - // connectionInfo.setSessionId("my session here"); - - MultiLayerNetwork model = new MultiLayerNetwork(conf); - model.init(); - model.setListeners(Arrays.asList(new ScoreIterationListener(listenerFreq), - new HistogramIterationListener(listenerFreq), new FlowIterationListener(listenerFreq))); - - log.info("Train model...."); - model.fit(iter); // achieves end to end pre-training - - log.info("Evaluate model...."); - Evaluation eval = new Evaluation(outputNum); - - DataSetIterator testIter = new MnistDataSetIterator(100, 10000); - while (testIter.hasNext()) { - DataSet testMnist = testIter.next(); - INDArray predict2 = model.output(testMnist.getFeatureMatrix()); - eval.eval(testMnist.getLabels(), predict2); - } - - log.info(eval.stats()); - log.info("****************Example finished********************"); - - fail("Not implemented"); - } - /** * This test is for manual execution only, since it's here just to get working CNN and visualize it's layers * @@ -201,8 +142,7 @@ public void testCNNActivationsVisualization() throws Exception { MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) .activation(Activation.RELU).weightInit(WeightInit.XAVIER) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.01) - .momentum(0.9).regularization(true).updater(Updater.ADAGRAD).useDropConnect(true).list() + .updater(new AdaGrad(0.01)).weightNoise(new DropConnect(0.5)).list() .layer(0, new ConvolutionLayer.Builder(4, 4).name("cnn1").nIn(nChannels).stride(1, 1).nOut(20) .build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) @@ -251,71 +191,6 @@ public void testCNNActivationsVisualization() throws Exception { } - @Test - public void testFlowActivationsMLN1() throws Exception { - int nChannels = 1; - int outputNum = 10; - int batchSize = 64; - int nEpochs = 10; - int iterations = 1; - int seed = 123; - - log.info("Load data...."); - DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, 12345); - DataSetIterator mnistTest = new MnistDataSetIterator(batchSize, false, 12345); - - log.info("Build model...."); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) - .regularization(true).l2(0.0005).learningRate(0.01)//.biasLearningRate(0.02) - //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) - .weightInit(WeightInit.XAVIER) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS) - .momentum(0.9).list() - .layer(0, new ConvolutionLayer.Builder(5, 5) - //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied - .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) - .stride(2, 2).build()) - .layer(2, new ConvolutionLayer.Builder(5, 5) - //Note that nIn needed be specified in later layers - .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) - .stride(2, 2).build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .backprop(true).pretrain(false).setInputType(InputType.convolutional(28, 28, nChannels)); - - MultiLayerConfiguration conf = builder.build(); - MultiLayerNetwork model = new MultiLayerNetwork(conf); - model.init(); - - - log.info("Train model...."); - model.setListeners(new FlowIterationListener(1)); - for (int i = 0; i < nEpochs; i++) { - model.fit(mnistTrain); - log.info("*** Completed epoch {} ***", i); - mnistTest.reset(); - } - - log.info("Evaluate model...."); - Evaluation eval = new Evaluation(outputNum); - while (mnistTest.hasNext()) { - DataSet ds = mnistTest.next(); - INDArray output = model.output(ds.getFeatureMatrix(), false); - eval.eval(ds.getLabels(), output); - } - log.info(eval.stats()); - - log.info("****************Example finished********************"); - } - - @Test - public void testFlowActivationsCG1() throws Exception { - - } - @Test public void testWord2VecPlot() throws Exception { File inputFile = new ClassPathResource("/big/raw_sentences.txt").getFile(); @@ -387,11 +262,9 @@ public void testCNNActivations2() throws Exception { log.info("Build model...."); MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) - .regularization(true).l2(0.0005).learningRate(0.01)//.biasLearningRate(0.02) - //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) + .l2(0.0005) .weightInit(WeightInit.XAVIER) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS) - .momentum(0.9).list() + .updater(new Nesterovs(0.01, 0.9)).list() .layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/TestRenders.java b/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/TestRenders.java deleted file mode 100644 index 9735d863c208..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/TestRenders.java +++ /dev/null @@ -1,155 +0,0 @@ -package org.deeplearning4j.ui; - -import org.deeplearning4j.datasets.fetchers.MnistDataFetcher; -import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; -import org.deeplearning4j.nn.conf.layers.OutputLayer; -import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; -import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; -import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToCnnPreProcessor; -import org.deeplearning4j.nn.graph.ComputationGraph; -import org.deeplearning4j.nn.layers.feedforward.autoencoder.AutoEncoder; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.optimize.api.IterationListener; -import org.deeplearning4j.optimize.listeners.ScoreIterationListener; -import org.deeplearning4j.ui.weights.HistogramIterationListener; -import org.junit.Ignore; -import org.junit.Test; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.DataSet; -import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.lossfunctions.LossFunctions; - -import java.util.Arrays; - - -/** - * @author Adam Gibson - */ -@Ignore -public class TestRenders extends BaseUiServerTest { - - @Test - public void renderHistogram() throws Exception { - MnistDataFetcher fetcher = new MnistDataFetcher(true); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().momentum(0.9f) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(100) - .learningRate(1e-1f) - .layer(new org.deeplearning4j.nn.conf.layers.AutoEncoder.Builder().nIn(784).nOut(600) - .corruptionLevel(0.6).weightInit(WeightInit.XAVIER) - .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) - .build(); - - - fetcher.fetch(100); - DataSet d2 = fetcher.next(); - - INDArray input = d2.getFeatureMatrix(); - int numParams = conf.getLayer().initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - AutoEncoder da = (AutoEncoder) conf.getLayer().instantiate(conf, null, 0, params, true); - da.setListeners(new ScoreIterationListener(1), new HistogramIterationListener(5)); - da.setParams(da.params()); - da.fit(input); - } - - @Test - public void renderHistogram2() throws Exception { - MnistDataFetcher fetcher = new MnistDataFetcher(true); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().momentum(0.9f) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1000) - .learningRate(1e-1f).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(784).nOut(100) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder() - .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(100).nOut(10).build()) - .pretrain(false).backprop(true).build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - net.setListeners(Arrays.asList(new ScoreIterationListener(1), - new HistogramIterationListener(1, true))); - - fetcher.fetch(100); - DataSet d2 = fetcher.next(); - net.fit(d2); - } - - @Test - public void testHistogramComputationGraph() throws Exception { - ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .graphBuilder().addInputs("input") - .addLayer("cnn1", - new ConvolutionLayer.Builder(2, 2).stride(2, 2).nIn(1).nOut(3) - .build(), - "input") - .addLayer("cnn2", - new ConvolutionLayer.Builder(4, 4).stride(2, 2).padding(1, 1) - .nIn(1).nOut(3).build(), - "input") - .addLayer("max1", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) - .kernelSize(2, 2).build(), - "cnn1", "cnn2") - .addLayer("output", new OutputLayer.Builder().nIn(7 * 7 * 6).nOut(10).build(), - "max1") - .setOutputs("output") - .inputPreProcessor("cnn1", new FeedForwardToCnnPreProcessor(28, 28, 1)) - .inputPreProcessor("cnn2", new FeedForwardToCnnPreProcessor(28, 28, 1)) - .inputPreProcessor("output", new CnnToFeedForwardPreProcessor(7, 7, 6)) - .pretrain(false).backprop(true).build(); - - ComputationGraph graph = new ComputationGraph(conf); - graph.init(); - - graph.setListeners(new HistogramIterationListener(1), new ScoreIterationListener(1)); - - DataSetIterator mnist = new MnistDataSetIterator(32, 640, false, true, false, 12345); - - graph.fit(mnist); - } - - @Test - public void testHistogramComputationGraphUnderscoresInName() throws Exception { - ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .graphBuilder().addInputs("input") - .setInputTypes(InputType.convolutional(1, 28, 28)) - .addLayer("cnn_1", - new ConvolutionLayer.Builder(2, 2).stride(2, 2).nIn(1).nOut(3) - .build(), - "input") - .addLayer("cnn_2", - new ConvolutionLayer.Builder(4, 4).stride(2, 2).padding(1, 1) - .nIn(1).nOut(3).build(), - "input") - .addLayer("max_1", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) - .kernelSize(2, 2).build(), - "cnn_1", "cnn_2") - .addLayer("output", new OutputLayer.Builder().nIn(7 * 7 * 6).nOut(10).build(), - "max_1") - .setOutputs("output").pretrain(false).backprop(true).build(); - - ComputationGraph graph = new ComputationGraph(conf); - graph.init(); - - graph.setListeners(new HistogramIterationListener(1), new ScoreIterationListener(1)); - - DataSetIterator mnist = new MnistDataSetIterator(32, 640, false, true, false, 12345); - - graph.fit(mnist); - } - -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/TestSerialization.java b/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/TestSerialization.java deleted file mode 100644 index ed5dfc049eac..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/TestSerialization.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.deeplearning4j.ui; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.module.SimpleModule; -import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.layers.feedforward.autoencoder.AutoEncoder; -import org.deeplearning4j.optimize.listeners.ScoreIterationListener; -import org.deeplearning4j.ui.weights.HistogramIterationListener; -import org.deeplearning4j.ui.weights.ModelAndGradient; -import org.junit.Test; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.DataSet; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.shade.serde.jackson.ndarray.NDArrayDeSerializer; -import org.nd4j.shade.serde.jackson.ndarray.NDArraySerializer; - -import java.util.Arrays; - -import static org.junit.Assert.assertEquals; - -/** - * @author Adam Gibson - */ -public class TestSerialization { - @Test - public void testModelSerde() throws Exception { - ObjectMapper mapper = getMapper(); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().momentum(0.9f) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1000) - .learningRate(1e-1f) - .layer(new org.deeplearning4j.nn.conf.layers.AutoEncoder.Builder().nIn(4).nOut(3) - .corruptionLevel(0.6).sparsity(0.5) - .lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).build()) - .build(); - - - DataSet d2 = new IrisDataSetIterator(150, 150).next(); - - INDArray input = d2.getFeatureMatrix(); - int numParams = conf.getLayer().initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - AutoEncoder da = (AutoEncoder) conf.getLayer().instantiate(conf, - Arrays.asList(new ScoreIterationListener(1), new HistogramIterationListener(1)), 0, params, - true); - da.setInput(input); - da.setBackpropGradientsViewArray(Nd4j.create(1, numParams)); - ModelAndGradient g = new ModelAndGradient(da); - String json = mapper.writeValueAsString(g); - ModelAndGradient read = mapper.readValue(json, ModelAndGradient.class); - assertEquals(g, read); - } - - - public ObjectMapper getMapper() { - ObjectMapper mapper = new ObjectMapper(); - SimpleModule nd4j = new SimpleModule("nd4j"); - nd4j.addDeserializer(INDArray.class, new NDArrayDeSerializer()); - nd4j.addSerializer(INDArray.class, new NDArraySerializer()); - mapper.registerModule(nd4j); - return mapper; - } - - -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/flow/FlowIterationListenerTest.java b/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/flow/FlowIterationListenerTest.java deleted file mode 100644 index f106b5f1c4cf..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/flow/FlowIterationListenerTest.java +++ /dev/null @@ -1,206 +0,0 @@ -package org.deeplearning4j.ui.flow; - -import org.datavec.image.loader.LFWLoader; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.*; -import org.deeplearning4j.nn.conf.graph.PreprocessorVertex; -import org.deeplearning4j.nn.conf.graph.rnn.DuplicateToTimeSeriesVertex; -import org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.*; -import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; -import org.deeplearning4j.nn.graph.ComputationGraph; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.ui.flow.beans.LayerInfo; -import org.deeplearning4j.ui.flow.beans.ModelInfo; -import org.junit.Before; -import org.junit.Test; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.DataSet; -import org.nd4j.linalg.dataset.SplitTestAndTrain; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.List; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; - -/** - * This set of tests addresses different stages of model state serialization for later visualization - * - * @author raver119@gmail.com - */ -public class FlowIterationListenerTest { - private static ComputationGraph graph; - private static MultiLayerNetwork network; - - private static Logger log = LoggerFactory.getLogger(FlowIterationListenerTest.class); - - @Before - public void setUp() throws Exception { - if (graph == null) { - int VOCAB_SIZE = 1000; - ComputationGraphConfiguration configuration = new NeuralNetConfiguration.Builder().regularization(true) - .l2(0.0001).weightInit(WeightInit.XAVIER).learningRate(0.01).updater(Updater.RMSPROP) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .graphBuilder().addInputs("inEn", "inFr") - .setInputTypes(InputType.recurrent(VOCAB_SIZE + 1), InputType.recurrent(VOCAB_SIZE + 1)) - .addLayer("embeddingEn", - new EmbeddingLayer.Builder().nIn(VOCAB_SIZE + 1).nOut(128) - .activation(Activation.IDENTITY).build(), - "inEn") - .addLayer("encoder", - new GravesLSTM.Builder().nIn(128).nOut(256).activation(Activation.SOFTSIGN) - .build(), - "embeddingEn") - .addVertex("lastTimeStep", new LastTimeStepVertex("inEn"), "encoder") - .addVertex("duplicateTimeStep", new DuplicateToTimeSeriesVertex("inFr"), "lastTimeStep") - .addLayer("embeddingFr", - new EmbeddingLayer.Builder().nIn(VOCAB_SIZE + 1).nOut(128) - .activation(Activation.IDENTITY).build(), - "inFr") - .addVertex("embeddingFrSeq", new PreprocessorVertex(new FeedForwardToRnnPreProcessor()), - "embeddingFr") - .addLayer("decoder", - new GravesLSTM.Builder().nIn(128 + 256).nOut(256) - .activation(Activation.SOFTSIGN).build(), - "embeddingFrSeq", "duplicateTimeStep") - .addLayer("output", - new RnnOutputLayer.Builder().nIn(256).nOut(VOCAB_SIZE + 1) - .activation(Activation.SOFTMAX).build(), - "decoder") - .setOutputs("output").pretrain(false).backprop(true).build(); - - graph = new ComputationGraph(configuration); - graph.init(); - - INDArray input = Nd4j.zeros(10, VOCAB_SIZE, 20); - graph.setInputs(input, input); - } - - if (network == null) { - final int numRows = 40; - final int numColumns = 40; - int nChannels = 3; - int outputNum = LFWLoader.NUM_LABELS; - int numSamples = LFWLoader.NUM_IMAGES; - boolean useSubset = false; - int batchSize = 200;// numSamples/10; - int iterations = 5; - int splitTrainNum = (int) (batchSize * .8); - int seed = 123; - int listenerFreq = iterations / 5; - DataSet lfwNext; - SplitTestAndTrain trainTest; - DataSet trainInput; - List testInput = new ArrayList<>(); - List testLabels = new ArrayList<>(); - - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) - .iterations(iterations).activation(Activation.RELU).weightInit(WeightInit.XAVIER) - .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.01) - .momentum(0.9).regularization(true).updater(Updater.ADAGRAD).useDropConnect(true).list() - .layer(0, new ConvolutionLayer.Builder(4, 4).name("cnn1").nIn(nChannels).stride(1, 1) - .nOut(20).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) - .name("pool1").build()) - .layer(2, new ConvolutionLayer.Builder(3, 3).name("cnn2").stride(1, 1).nOut(40).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) - .name("pool2").build()) - .layer(4, new ConvolutionLayer.Builder(3, 3).name("cnn3").stride(1, 1).nOut(60).build()) - .layer(5, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) - .name("pool3").build()) - .layer(6, new ConvolutionLayer.Builder(2, 2).name("cnn4").stride(1, 1).nOut(80).build()) - .layer(7, new DenseLayer.Builder().name("ffn1").nOut(160).dropOut(0.5).build()) - .layer(8, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .backprop(true).pretrain(false) - .setInputType(InputType.convolutional(numRows, numColumns, nChannels)); - - network = new MultiLayerNetwork(builder.build()); - network.init(); - - INDArray input = Nd4j.zeros(10, nChannels, numRows, numColumns); - network.setInput(input); - } - } - - @Test - public void testMLNModelInfo1() throws Exception { - FlowIterationListener listener = new FlowIterationListener(1); - - ModelInfo info = listener.buildModelInfo(network); - - for (LayerInfo layerInfo : info.getLayers()) { - log.info("Layer: " + layerInfo); - } - - // checking total number of layers - count now includes input as 0 layer in buildModelInfo - assertEquals(9, info.size() - 1); - - // checking, if all named layers exist - assertNotEquals(null, info.getLayerInfoByName("cnn1")); - assertNotEquals(null, info.getLayerInfoByName("cnn2")); - assertNotEquals(null, info.getLayerInfoByName("cnn3")); - assertNotEquals(null, info.getLayerInfoByName("cnn4")); - assertNotEquals(null, info.getLayerInfoByName("pool1")); - assertNotEquals(null, info.getLayerInfoByName("pool2")); - assertNotEquals(null, info.getLayerInfoByName("pool3")); - assertNotEquals(null, info.getLayerInfoByName("ffn1")); - - // checking if output layer has no outgoing connections - assertEquals(0, info.getLayerInfoByCoords(0, 9).getConnections().size()); - - // check description for cnn - assertNotEquals(null, info.getLayerInfoByName("cnn1").getDescription().getMainLine()); - } - - @Test - public void testCGModelInfo1() throws Exception { - FlowIterationListener listener = new FlowIterationListener(1); - - ModelInfo info = listener.buildModelInfo(graph); - for (LayerInfo layerInfo : info.getLayers()) { - log.info("Layer: " + layerInfo); - } - - // checking total number of layers - assertEquals(11, info.size()); - - // checking, if all named layers exist - assertNotEquals(null, info.getLayerInfoByName("inEn")); - assertNotEquals(null, info.getLayerInfoByName("inFr")); - assertNotEquals(null, info.getLayerInfoByName("embeddingEn")); - assertNotEquals(null, info.getLayerInfoByName("embeddingFr")); - assertNotEquals(null, info.getLayerInfoByName("encoder")); - assertNotEquals(null, info.getLayerInfoByName("embeddingFrSeq")); - assertNotEquals(null, info.getLayerInfoByName("lastTimeStep")); - assertNotEquals(null, info.getLayerInfoByName("duplicateTimeStep")); - assertNotEquals(null, info.getLayerInfoByName("decoder")); - assertNotEquals(null, info.getLayerInfoByName("output")); - - - // check that these two layers connect to the same node - LayerInfo info1 = info.getLayerInfoByName("duplicateTimeStep"); - LayerInfo info2 = info.getLayerInfoByName("embeddingFrSeq"); - - LayerInfo decoder = info.getLayerInfoByName("decoder-merge"); - - assertEquals(decoder.getX(), info2.getConnections().get(0).getX()); - assertEquals(decoder.getY(), info2.getConnections().get(0).getY()); - - assertEquals(decoder.getX(), info1.getConnections().get(0).getX()); - assertEquals(decoder.getY(), info1.getConnections().get(0).getY()); - - - - assertEquals(info1.getConnections().get(0), info2.getConnections().get(0)); - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/flow/TestFlowListener.java b/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/flow/TestFlowListener.java deleted file mode 100644 index e7ccaee3a860..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/flow/TestFlowListener.java +++ /dev/null @@ -1,114 +0,0 @@ -package org.deeplearning4j.ui.flow; - -import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; -import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.OutputLayer; -import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; -import org.deeplearning4j.nn.graph.ComputationGraph; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.optimize.listeners.ScoreIterationListener; -import org.junit.Ignore; -import org.junit.Test; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import org.nd4j.linalg.lossfunctions.LossFunctions; - -/** - * Created by Alex on 08/10/2016. - */ -@Ignore -public class TestFlowListener { - - @Test - public void testUI() throws Exception { - - int nChannels = 1; // Number of input channels - int outputNum = 10; // The number of possible outcomes - int batchSize = 64; // Test batch size - - DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, 12345); - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1) // Training iterations as above - .regularization(true).l2(0.0005).learningRate(0.01).weightInit(WeightInit.XAVIER) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS) - .momentum(0.9).list() - .layer(0, new ConvolutionLayer.Builder(5, 5) - //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied - .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) - .stride(2, 2).build()) - .layer(2, new ConvolutionLayer.Builder(5, 5) - //Note that nIn need not be specified in later layers - .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) - .stride(2, 2).build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)) //See note below - .backprop(true).pretrain(false).build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - net.setListeners(new FlowIterationListener(1), new ScoreIterationListener(1)); - - for (int i = 0; i < 50; i++) { - net.fit(mnistTrain.next()); - Thread.sleep(1000); - } - - - Thread.sleep(100000); - } - - @Test - public void testUICG() throws Exception { - - int nChannels = 1; // Number of input channels - int outputNum = 10; // The number of possible outcomes - int batchSize = 64; // Test batch size - - DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, 12345); - - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1) // Training iterations as above - .regularization(true).l2(0.0005).learningRate(0.01).weightInit(WeightInit.XAVIER) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS) - .momentum(0.9).graphBuilder().addInputs("in") - .addLayer("0", new ConvolutionLayer.Builder(5, 5) - //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied - .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build(), - "in") - .addLayer("1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) - .stride(2, 2).build(), "0") - .addLayer("2", new ConvolutionLayer.Builder(5, 5) - //Note that nIn need not be specified in later layers - .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build(), "1") - .addLayer("3", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) - .stride(2, 2).build(), "2") - .addLayer("4", new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build(), "3") - .addLayer("5", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(outputNum).activation(Activation.SOFTMAX).build(), "4") - .setOutputs("5").setInputTypes(InputType.convolutionalFlat(28, 28, 1)).backprop(true) - .pretrain(false).build(); - - ComputationGraph net = new ComputationGraph(conf); - net.init(); - net.setListeners(new FlowIterationListener(1), new ScoreIterationListener(1)); - - for (int i = 0; i < 50; i++) { - net.fit(mnistTrain.next()); - Thread.sleep(1000); - } - - - Thread.sleep(100000); - } -} diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java b/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java index 82bcfa944679..9a01dfb238ba 100644 --- a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java +++ b/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java @@ -1,10 +1,8 @@ package org.deeplearning4j.ui.weights; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -17,6 +15,7 @@ import org.junit.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; /** @@ -35,9 +34,8 @@ public void testUI() throws Exception { DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, 12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1) // Training iterations as above - .regularization(true).l2(0.0005).learningRate(0.01).weightInit(WeightInit.XAVIER) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS) - .momentum(0.9).list() + .l2(0.0005).weightInit(WeightInit.XAVIER) + .updater(new Nesterovs(0.01, 0.9)).list() .layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) diff --git a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/weights/TestHistogramListener.java b/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/weights/TestHistogramListener.java deleted file mode 100644 index c60a3073b950..000000000000 --- a/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/java/org/deeplearning4j/ui/weights/TestHistogramListener.java +++ /dev/null @@ -1,49 +0,0 @@ -package org.deeplearning4j.ui.weights; - -import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.OutputLayer; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.optimize.listeners.ScoreIterationListener; -import org.junit.Ignore; -import org.junit.Test; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import org.nd4j.linalg.lossfunctions.LossFunctions; - -/** - * Created by Alex on 08/10/2016. - */ -@Ignore -public class TestHistogramListener { - - @Test - public void testUI() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list() - .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(4).build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(4).nOut(3).build()) - .pretrain(false).backprop(true).build(); - - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - net.setListeners(new HistogramIterationListener(1), new ScoreIterationListener(1)); - - DataSetIterator iter = new IrisDataSetIterator(150, 150); - - for (int i = 0; i < 100; i++) { - net.fit(iter); - Thread.sleep(1000); - } - - - - Thread.sleep(100000); - } - -} diff --git a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java index 030e494f3734..e712650d2d06 100644 --- a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java +++ b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java @@ -18,6 +18,7 @@ import org.deeplearning4j.zoo.ZooModel; import org.deeplearning4j.zoo.ZooType; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; @@ -87,7 +88,8 @@ public MultiLayerConfiguration conf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0.0, 0.01)) .activation(Activation.RELU).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.NESTEROVS).learningRate(1e-2).biasLearningRate(1e-2 * 2).regularization(true) + .updater(new Nesterovs(1e-2, 0.9)) + .biasUpdater(new Nesterovs(2e-2, 0.9)) .convolutionMode(ConvolutionMode.Same) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) // normalize to prevent vanishing or exploding gradients .trainingWorkspaceMode(WorkspaceMode.SINGLE).inferenceWorkspaceMode(WorkspaceMode.SINGLE) diff --git a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java index 1f142fae7d2b..1fd7916f4151 100644 --- a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java +++ b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java @@ -76,8 +76,8 @@ public ComputationGraphConfiguration conf() { ComputationGraphConfiguration.GraphBuilder graph = new NeuralNetConfiguration.Builder().seed(seed) .iterations(iterations).activation(Activation.IDENTITY) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new Adam(0.1, 0.9, 0.999, 0.01)).weightInit(WeightInit.RELU).regularization(true) - .l2(5e-5).learningRate(0.1).miniBatch(true).convolutionMode(ConvolutionMode.Same) + .updater(new Adam(0.1, 0.9, 0.999, 0.01)).weightInit(WeightInit.RELU) + .l2(5e-5).miniBatch(true).convolutionMode(ConvolutionMode.Same) .graphBuilder(); diff --git a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/GoogLeNet.java b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/GoogLeNet.java index 96bdd90228c7..7c8f885ba866 100644 --- a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/GoogLeNet.java +++ b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/GoogLeNet.java @@ -5,7 +5,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration.GraphBuilder; -import org.deeplearning4j.nn.conf.LearningRatePolicy; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.graph.MergeVertex; @@ -19,6 +18,8 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.schedule.ScheduleType; +import org.nd4j.linalg.schedule.StepSchedule; /** * GoogleLeNet @@ -137,9 +138,9 @@ private GraphBuilder inception(GraphBuilder graph, String name, int inputSize, i public ComputationGraphConfiguration conf() { GraphBuilder graph = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) .activation(Activation.RELU).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .learningRate(1e-2).biasLearningRate(2 * 1e-2).learningRateDecayPolicy(LearningRatePolicy.Step) - .lrPolicyDecayRate(0.96).lrPolicySteps(320000).updater(new Nesterovs(1e-2, 0.9)) - .weightInit(WeightInit.XAVIER).regularization(true).l2(2e-4).graphBuilder(); + .updater(new Nesterovs(new StepSchedule(ScheduleType.ITERATION, 1e-2, 0.96, 320000), 0.9)) + .biasUpdater(new Nesterovs(new StepSchedule(ScheduleType.ITERATION, 2e-2, 0.96, 320000), 0.9)) + .weightInit(WeightInit.XAVIER).l2(2e-4).graphBuilder(); graph.addInputs("input").addLayer("cnn1", conv7x7(inputShape[0], 64, 0.2), "input") .addLayer("max1", diff --git a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java index 01e1419d9d1a..dde87ca7fa55 100644 --- a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java +++ b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java @@ -106,7 +106,7 @@ public ComputationGraphConfiguration.GraphBuilder graphBuilder(String input) { .iterations(iterations).activation(Activation.RELU) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new RmsProp(0.1, 0.96, 0.001)).weightInit(WeightInit.DISTRIBUTION) - .dist(new NormalDistribution(0.0, 0.5)).regularization(true).l2(5e-5).miniBatch(true) + .dist(new NormalDistribution(0.0, 0.5)).l2(5e-5).miniBatch(true) .convolutionMode(ConvolutionMode.Truncate).graphBuilder(); diff --git a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/LeNet.java b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/LeNet.java index 0d0bda6ecf41..d05f18b0f0de 100644 --- a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/LeNet.java +++ b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/LeNet.java @@ -85,7 +85,7 @@ public MultiLayerConfiguration conf() { .inferenceWorkspaceMode(workspaceMode).seed(seed).iterations(iterations) .activation(Activation.IDENTITY).weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new AdaDelta()) - .regularization(false).convolutionMode(ConvolutionMode.Same).list() + .convolutionMode(ConvolutionMode.Same).list() // block 1 .layer(0, new ConvolutionLayer.Builder(new int[] {5, 5}, new int[] {1, 1}).name("cnn1") .nIn(inputShape[0]).nOut(20).activation(Activation.RELU).build()) diff --git a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java index 3a49886c93d9..493da004e4e2 100644 --- a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java +++ b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java @@ -176,7 +176,7 @@ public ComputationGraphConfiguration.GraphBuilder graphBuilder() { .iterations(iterations).activation(Activation.IDENTITY) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new RmsProp(0.1, 0.96, 0.001)).weightInit(WeightInit.DISTRIBUTION) - .dist(new NormalDistribution(0.0, 0.5)).regularization(true).l1(1e-7).l2(5e-5).miniBatch(true) + .dist(new NormalDistribution(0.0, 0.5)).l1(1e-7).l2(5e-5).miniBatch(true) .convolutionMode(ConvolutionMode.Truncate).graphBuilder(); diff --git a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java index 4ad78661df2e..16781b2d764a 100644 --- a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java +++ b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java @@ -73,7 +73,7 @@ public MultiLayerConfiguration conf() { .inferenceWorkspaceMode(workspaceMode).seed(seed).iterations(iterations) .activation(Activation.IDENTITY).weightInit(WeightInit.RELU) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new AdaDelta()).regularization(false) + .updater(new AdaDelta()) .convolutionMode(ConvolutionMode.Same).list() // block 1 .layer(0, new ConvolutionLayer.Builder(new int[] {7, 7}).name("image_array") diff --git a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java index 8f2b3ec3a742..51dbdd78742a 100644 --- a/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java +++ b/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java @@ -78,8 +78,8 @@ public Class modelType() { public MultiLayerConfiguration conf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) - .learningRate(0.01).seed(12345).regularization(true).l2(0.001).weightInit(WeightInit.XAVIER) - .updater(new RmsProp()).list() + .seed(12345).l2(0.001).weightInit(WeightInit.XAVIER) + .updater(new RmsProp(0.01)).list() .layer(0, new GravesLSTM.Builder().nIn(inputShape[1]).nOut(256).activation(Activation.TANH) .build()) .layer(1, new GravesLSTM.Builder().nOut(256).activation(Activation.TANH).build())