List of usage examples for org.deeplearning4j.nn.conf GradientNormalization RenormalizeL2PerLayer
GradientNormalization RenormalizeL2PerLayer
To view the source code for org.deeplearning4j.nn.conf GradientNormalization RenormalizeL2PerLayer.
Click Source Link
From source file:com.example.android.displayingbitmaps.ui.ImageGridActivity.java
License:Apache License
public void trainMLP() throws Exception { Nd4j.ENFORCE_NUMERICAL_STABILITY = true; final int numRows = 28; final int numColumns = 28; int outputNum = 10; int numSamples = 10000; int batchSize = 500; int iterations = 10; int seed = 123; int listenerFreq = iterations / 5; int splitTrainNum = (int) (batchSize * .8); DataSet mnist;/*from www .j a v a 2 s . c o m*/ SplitTestAndTrain trainTest; DataSet trainInput; List<INDArray> testInput = new ArrayList<>(); List<INDArray> testLabels = new ArrayList<>(); log.info("Load data...."); DataSetIterator mnistIter = new MnistDataSetIterator(batchSize, numSamples, true); log.info("Build model...."); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(iterations) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).learningRate(1e-1f) .momentum(0.5).momentumAfter(Collections.singletonMap(3, 0.9)).useDropConnect(true).list(2) .layer(0, new DenseLayer.Builder().nIn(numRows * numColumns).nOut(1000).activation("relu") .weightInit(WeightInit.XAVIER).build()) .layer(1, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD).nIn(1000).nOut(outputNum) .activation("softmax").weightInit(WeightInit.XAVIER).build()) .build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq))); log.info("Train model...."); model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq))); while (mnistIter.hasNext()) { mnist = mnistIter.next(); trainTest = mnist.splitTestAndTrain(splitTrainNum, new Random(seed)); // train set that is the result trainInput = trainTest.getTrain(); // get feature matrix and labels for training testInput.add(trainTest.getTest().getFeatureMatrix()); testLabels.add(trainTest.getTest().getLabels()); model.fit(trainInput); } log.info("Evaluate model...."); Evaluation eval = new Evaluation(outputNum); for (int i = 0; i < testInput.size(); i++) { INDArray output = model.output(testInput.get(i)); eval.eval(testLabels.get(i), output); } log.info(eval.stats()); log.info("****************Example finished********************"); }
From source file:org.eigengo.rsa.identity.v100.AlexNet.java
License:Open Source License
public MultiLayerConfiguration conf() { double nonZeroBias = 1; double dropOut = 0.5; SubsamplingLayer.PoolingType poolingType = SubsamplingLayer.PoolingType.MAX; // TODO split and link kernel maps on GPUs - 2nd, 4th, 5th convolution should only connect maps on the same gpu, 3rd connects to all in 2nd MultiLayerConfiguration.Builder conf = new NeuralNetConfiguration.Builder().seed(seed) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0.0, 0.01)).activation("relu") .updater(Updater.NESTEROVS).iterations(iterations) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) // normalize to prevent vanishing or exploding gradients .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1e-2) .biasLearningRate(1e-2 * 2).learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(0.1) .lrPolicySteps(100000).regularization(true).l2(5 * 1e-4).momentum(0.9).miniBatch(false).list() .layer(0,//w w w . j a va 2 s . co m new ConvolutionLayer.Builder(new int[] { 11, 11 }, new int[] { 4, 4 }, new int[] { 3, 3 }) .name("cnn1").nIn(channels).nOut(96).build()) .layer(1, new LocalResponseNormalization.Builder().name("lrn1").build()) .layer(2, new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 }) .name("maxpool1").build()) .layer(3, new ConvolutionLayer.Builder(new int[] { 5, 5 }, new int[] { 1, 1 }, new int[] { 2, 2 }) .name("cnn2").nOut(256).biasInit(nonZeroBias).build()) .layer(4, new LocalResponseNormalization.Builder().name("lrn2").k(2).n(5).alpha(1e-4).beta(0.75) .build()) .layer(5, new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 }) .name("maxpool2").build()) .layer(6, new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }) .name("cnn3").nOut(384).build()) .layer(7, new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }) .name("cnn4").nOut(384).biasInit(nonZeroBias).build()) .layer(8, new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }) .name("cnn5").nOut(256).biasInit(nonZeroBias).build()) .layer(9, new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 }) .name("maxpool3").build()) .layer(10, new DenseLayer.Builder().name("ffn1").nOut(4096).dist(new GaussianDistribution(0, 0.005)) .biasInit(nonZeroBias).dropOut(dropOut).build()) .layer(11, new DenseLayer.Builder().name("ffn2").nOut(4096).dist(new GaussianDistribution(0, 0.005)) .biasInit(nonZeroBias).dropOut(dropOut).build()) .layer(12, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).name("output") .nOut(numLabels).activation("softmax").build()) .backprop(true).pretrain(false).cnnInputSize(height, width, channels); return conf.build(); }
From source file:org.eigengo.rsa.identity.v100.DeepFaceVariant.java
License:Open Source License
public MultiLayerNetwork init() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) .activation("relu").weightInit(WeightInit.XAVIER) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.01) .momentum(0.9).regularization(true).l2(1e-3).updater(Updater.ADAGRAD).useDropConnect(true).list() .layer(0,/* w w w . j av a2 s .c om*/ new ConvolutionLayer.Builder(4, 4).name("cnn1").nIn(channels).stride(1, 1).nOut(20).build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }) .name("pool1").build()) .layer(2, new ConvolutionLayer.Builder(3, 3).name("cnn2").stride(1, 1).nOut(40).build()) .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }) .name("pool2").build()) .layer(4, new ConvolutionLayer.Builder(3, 3).name("cnn3").stride(1, 1).nOut(60).build()) .layer(5, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }) .name("pool3").build()) .layer(6, new ConvolutionLayer.Builder(2, 2).name("cnn3").stride(1, 1).nOut(80).build()) .layer(7, new DenseLayer.Builder().name("ffn1").nOut(160).dropOut(0.5).build()) .layer(8, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(numLabels) .activation("softmax").build()) .backprop(true).pretrain(false).cnnInputSize(height, width, channels).build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); return network; }
From source file:seqmodel.RNNModel.java
MultiLayerNetwork buildRNN(AMISentenceIterator iter) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(SEED).iterations(10)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
//.optimizationAlgo(OptimizationAlgorithm.LBFGS)
//.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
.updater(Updater.RMSPROP)/*from ww w .j av a 2 s . c o m*/
//.updater(Updater.ADAGRAD)
//.updater(Updater.SGD)
//.regularization(true).l2(0.0001)
//.regularization(true).l1(0.001)
.weightInit(WeightInit.RELU)
//.weightInit(WeightInit.UNIFORM)
//.weightInit(WeightInit.XAVIER)
//.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
.gradientNormalizationThreshold(1.0).learningRate(0.001).list(2)
.layer(0, new GravesLSTM.Builder().nIn(numInputDimensions).nOut(NUM_DIMENSIONS_LSTM)
//.activation("softsign").build())
.activation("softmax")
//.activation("tanh")
.build())
/*
.layer(1,
new GravesLSTM.Builder()
.nIn(25)
.nOut(NUM_DIMENSIONS_LSTM)
//.activation("softsign").build())
.activation("softmax")
//.activation("tanh")
.build())
*/
.layer(1, new RnnOutputLayer.Builder().activation("softmax")
//.activation("tanh")
//.activation("sigmoid")
//.lossFunction(LossFunctions.LossFunction.MCXENT)
.lossFunction(LossFunctions.LossFunction.RMSE_XENT).nIn(NUM_DIMENSIONS_LSTM).nOut(2) // 2 for binary classification
.build())
.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(TRUNCATED_BPP_LEN)
.tBPTTBackwardLength(TRUNCATED_BPP_LEN)
//.backpropType(BackpropType.Standard)
.pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setListeners(new ScoreIterationListener(1));
return net;
}