Example usage for org.deeplearning4j.nn.conf GradientNormalization RenormalizeL2PerLayer

List of usage examples for org.deeplearning4j.nn.conf GradientNormalization RenormalizeL2PerLayer

Introduction

In this page you can find the example usage for org.deeplearning4j.nn.conf GradientNormalization RenormalizeL2PerLayer.

Prototype

GradientNormalization RenormalizeL2PerLayer

To view the source code for org.deeplearning4j.nn.conf GradientNormalization RenormalizeL2PerLayer.

Click Source Link

Usage

From source file:com.example.android.displayingbitmaps.ui.ImageGridActivity.java

License:Apache License

public void trainMLP() throws Exception {
    Nd4j.ENFORCE_NUMERICAL_STABILITY = true;
    final int numRows = 28;
    final int numColumns = 28;
    int outputNum = 10;
    int numSamples = 10000;
    int batchSize = 500;
    int iterations = 10;
    int seed = 123;
    int listenerFreq = iterations / 5;
    int splitTrainNum = (int) (batchSize * .8);
    DataSet mnist;/*from www  .j  a v  a  2  s .  c o  m*/
    SplitTestAndTrain trainTest;
    DataSet trainInput;
    List<INDArray> testInput = new ArrayList<>();
    List<INDArray> testLabels = new ArrayList<>();

    log.info("Load data....");
    DataSetIterator mnistIter = new MnistDataSetIterator(batchSize, numSamples, true);

    log.info("Build model....");
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(iterations)
            .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).learningRate(1e-1f)
            .momentum(0.5).momentumAfter(Collections.singletonMap(3, 0.9)).useDropConnect(true).list(2)
            .layer(0,
                    new DenseLayer.Builder().nIn(numRows * numColumns).nOut(1000).activation("relu")
                            .weightInit(WeightInit.XAVIER).build())
            .layer(1, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD).nIn(1000).nOut(outputNum)
                    .activation("softmax").weightInit(WeightInit.XAVIER).build())
            .build();

    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();
    model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq)));

    log.info("Train model....");
    model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq)));
    while (mnistIter.hasNext()) {
        mnist = mnistIter.next();
        trainTest = mnist.splitTestAndTrain(splitTrainNum, new Random(seed)); // train set that is the result
        trainInput = trainTest.getTrain(); // get feature matrix and labels for training
        testInput.add(trainTest.getTest().getFeatureMatrix());
        testLabels.add(trainTest.getTest().getLabels());
        model.fit(trainInput);
    }

    log.info("Evaluate model....");
    Evaluation eval = new Evaluation(outputNum);
    for (int i = 0; i < testInput.size(); i++) {
        INDArray output = model.output(testInput.get(i));
        eval.eval(testLabels.get(i), output);
    }

    log.info(eval.stats());
    log.info("****************Example finished********************");
}

From source file:org.eigengo.rsa.identity.v100.AlexNet.java

License:Open Source License

public MultiLayerConfiguration conf() {
    double nonZeroBias = 1;
    double dropOut = 0.5;
    SubsamplingLayer.PoolingType poolingType = SubsamplingLayer.PoolingType.MAX;

    // TODO split and link kernel maps on GPUs - 2nd, 4th, 5th convolution should only connect maps on the same gpu, 3rd connects to all in 2nd
    MultiLayerConfiguration.Builder conf = new NeuralNetConfiguration.Builder().seed(seed)
            .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0.0, 0.01)).activation("relu")
            .updater(Updater.NESTEROVS).iterations(iterations)
            .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) // normalize to prevent vanishing or exploding gradients
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1e-2)
            .biasLearningRate(1e-2 * 2).learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(0.1)
            .lrPolicySteps(100000).regularization(true).l2(5 * 1e-4).momentum(0.9).miniBatch(false).list()
            .layer(0,//w w w . j  a  va  2  s . co m
                    new ConvolutionLayer.Builder(new int[] { 11, 11 }, new int[] { 4, 4 }, new int[] { 3, 3 })
                            .name("cnn1").nIn(channels).nOut(96).build())
            .layer(1, new LocalResponseNormalization.Builder().name("lrn1").build())
            .layer(2,
                    new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
                            .name("maxpool1").build())
            .layer(3,
                    new ConvolutionLayer.Builder(new int[] { 5, 5 }, new int[] { 1, 1 }, new int[] { 2, 2 })
                            .name("cnn2").nOut(256).biasInit(nonZeroBias).build())
            .layer(4,
                    new LocalResponseNormalization.Builder().name("lrn2").k(2).n(5).alpha(1e-4).beta(0.75)
                            .build())
            .layer(5,
                    new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
                            .name("maxpool2").build())
            .layer(6,
                    new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
                            .name("cnn3").nOut(384).build())
            .layer(7,
                    new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
                            .name("cnn4").nOut(384).biasInit(nonZeroBias).build())
            .layer(8,
                    new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
                            .name("cnn5").nOut(256).biasInit(nonZeroBias).build())
            .layer(9,
                    new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
                            .name("maxpool3").build())
            .layer(10,
                    new DenseLayer.Builder().name("ffn1").nOut(4096).dist(new GaussianDistribution(0, 0.005))
                            .biasInit(nonZeroBias).dropOut(dropOut).build())
            .layer(11,
                    new DenseLayer.Builder().name("ffn2").nOut(4096).dist(new GaussianDistribution(0, 0.005))
                            .biasInit(nonZeroBias).dropOut(dropOut).build())
            .layer(12,
                    new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).name("output")
                            .nOut(numLabels).activation("softmax").build())
            .backprop(true).pretrain(false).cnnInputSize(height, width, channels);

    return conf.build();
}

From source file:org.eigengo.rsa.identity.v100.DeepFaceVariant.java

License:Open Source License

public MultiLayerNetwork init() {
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
            .activation("relu").weightInit(WeightInit.XAVIER)
            .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.01)
            .momentum(0.9).regularization(true).l2(1e-3).updater(Updater.ADAGRAD).useDropConnect(true).list()
            .layer(0,/*  w  w  w  . j av  a2  s .c om*/
                    new ConvolutionLayer.Builder(4, 4).name("cnn1").nIn(channels).stride(1, 1).nOut(20).build())
            .layer(1,
                    new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 })
                            .name("pool1").build())
            .layer(2, new ConvolutionLayer.Builder(3, 3).name("cnn2").stride(1, 1).nOut(40).build())
            .layer(3,
                    new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 })
                            .name("pool2").build())
            .layer(4, new ConvolutionLayer.Builder(3, 3).name("cnn3").stride(1, 1).nOut(60).build())
            .layer(5,
                    new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 })
                            .name("pool3").build())
            .layer(6, new ConvolutionLayer.Builder(2, 2).name("cnn3").stride(1, 1).nOut(80).build())
            .layer(7, new DenseLayer.Builder().name("ffn1").nOut(160).dropOut(0.5).build())
            .layer(8,
                    new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(numLabels)
                            .activation("softmax").build())
            .backprop(true).pretrain(false).cnnInputSize(height, width, channels).build();

    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();
    return network;
}

From source file:seqmodel.RNNModel.java

MultiLayerNetwork buildRNN(AMISentenceIterator iter) {

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(SEED).iterations(10)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            //.optimizationAlgo(OptimizationAlgorithm.LBFGS)
            //.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
            .updater(Updater.RMSPROP)/*from   ww  w .j av  a 2  s . c o  m*/
            //.updater(Updater.ADAGRAD)
            //.updater(Updater.SGD)
            //.regularization(true).l2(0.0001)
            //.regularization(true).l1(0.001)
            .weightInit(WeightInit.RELU)
            //.weightInit(WeightInit.UNIFORM)
            //.weightInit(WeightInit.XAVIER)
            //.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
            .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
            .gradientNormalizationThreshold(1.0).learningRate(0.001).list(2)
            .layer(0, new GravesLSTM.Builder().nIn(numInputDimensions).nOut(NUM_DIMENSIONS_LSTM)
                    //.activation("softsign").build())
                    .activation("softmax")
                    //.activation("tanh")
                    .build())
            /*        
            .layer(1,
                    new GravesLSTM.Builder()
                    .nIn(25)
                    .nOut(NUM_DIMENSIONS_LSTM)
                    //.activation("softsign").build())
                    .activation("softmax")
                    //.activation("tanh")
                    .build())
            */
            .layer(1, new RnnOutputLayer.Builder().activation("softmax")
                    //.activation("tanh")
                    //.activation("sigmoid")
                    //.lossFunction(LossFunctions.LossFunction.MCXENT)
                    .lossFunction(LossFunctions.LossFunction.RMSE_XENT).nIn(NUM_DIMENSIONS_LSTM).nOut(2) // 2 for binary classification
                    .build())
            .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(TRUNCATED_BPP_LEN)
            .tBPTTBackwardLength(TRUNCATED_BPP_LEN)
            //.backpropType(BackpropType.Standard)
            .pretrain(false).backprop(true).build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.setListeners(new ScoreIterationListener(1));
    return net;
}