Example usage for org.deeplearning4j.nn.conf BackpropType TruncatedBPTT

List of usage examples for org.deeplearning4j.nn.conf BackpropType TruncatedBPTT

Introduction

In this page you can find the example usage for org.deeplearning4j.nn.conf BackpropType TruncatedBPTT.

Prototype

BackpropType TruncatedBPTT

To view the source code for org.deeplearning4j.nn.conf BackpropType TruncatedBPTT.

Click Source Link

Document

Truncated BackPropagation Through Time.

Usage

From source file:org.ensor.fftmusings.autoencoder.RNNTrainer.java

public static void main(String[] args) throws Exception {

    MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn");

    Random rng = new Random();

    RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out);

    double learningRate = 0.0001;
    if (args.length != 0) {
        learningRate = Double.parseDouble(args[0]);
    }//from   ww w .jav  a2 s . co m

    int nGaussians = 8;
    int labelWidth = iter.totalOutcomes();
    int inputWidth = iter.inputColumns();
    int lstmLayerSize = 400;
    int bttLength = 50;

    LossMixtureDensity costFunction = LossMixtureDensity.builder().gaussians(nGaussians).labelWidth(inputWidth)
            .build();

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
            .learningRate(learningRate).rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001)
            .weightInit(WeightInit.XAVIER).list()
            .layer(0,
                    new GravesLSTM.Builder().nIn(inputWidth).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).build())
            //                .layer(2, new RnnOutputLayer.Builder()
            //                        .nIn(lstmLayerSize)
            //                        .nOut((labelWidth + 2) * nGaussians)
            //                        .activation(Activation.IDENTITY)
            //                        //.lossFunction(LossFunctions.LossFunction.MSE)
            //                        .lossFunction(LossMixtureDensity.builder()
            //                            .gaussians(nGaussians)
            //                            .labelWidth(inputWidth)
            //                            .build())
            //                        .updater(Updater.RMSPROP)
            //                        .weightInit(WeightInit.DISTRIBUTION)
            //                        .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(2,
                    new MixtureDensityRNNOutputLayer.Builder().gaussians(nGaussians).nIn(lstmLayerSize)
                            .nOut(labelWidth).updater(Updater.RMSPROP).build())
            .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT)
            .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build();

    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();
    model.setListeners(new ScoreIterationListener(System.out));

    for (int epoch = 0; epoch < 300; epoch++) {
        model.fit(iter);
        iter.reset();
        evaluateModel(model, costFunction, stackedAutoencoder, rng, epoch);
        ModelSerializer.writeModel(model, "stack-timeseries.rnn", true);
    }
}

From source file:org.ensor.fftmusings.autoencoder.RNNTrainer2.java

public static void main(String[] args) throws Exception {

    MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn");

    Random rng = new Random();

    RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out);

    int labels = iter.inputColumns();
    int lstmLayerSize = 200;
    int bttLength = 50;

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1)
            .rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001).list()
            .layer(0,/*  ww w . j a v  a 2s. c  o m*/
                    new GravesLSTM.Builder().nIn(labels).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(2,
                    new RnnOutputLayer.Builder().nIn(lstmLayerSize).nOut(labels).lossFunction(LossFunction.MSE)
                            .updater(Updater.RMSPROP).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT)
            .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build();

    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();
    model.setListeners(new ScoreIterationListener(System.out));

    for (int epoch = 0; epoch < 300; epoch++) {
        model.fit(iter);
        iter.reset();
        evaluateModel(model, stackedAutoencoder, rng, epoch);
        ModelSerializer.writeModel(model, "stack-timeseries.rnn", true);
    }
}

From source file:org.ensor.fftmusings.rnn.RNNFactory.java

public static MultiLayerNetwork create(File modelFilename, CharacterIterator iter) throws IOException {

    if (modelFilename.exists()) {
        MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(modelFilename);
        net.clear();//from  www  .  jav  a  2 s  .c  o m
        net.setListeners(new ScoreIterationListener(System.out));
        return net;
    }

    int nOut = iter.totalOutcomes();

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1)
            .rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).list()
            .layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize)
                    .updater(Updater.RMSPROP).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                    .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(2,
                    new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification
                            .updater(Updater.RMSPROP).nIn(lstmLayerSize).nOut(nOut)
                            .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.08, 0.08))
                            .build())
            .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.setListeners(new ScoreIterationListener(System.out));

    ModelSerializer.writeModel(net, modelFilename, true);

    return net;
}

From source file:org.ensor.fftmusings.rnn2.GravesLSTMCharModellingExample.java

public static void main(String[] args) throws Exception {
    int lstmLayerSize = 200; //Number of units in each GravesLSTM layer
    int miniBatchSize = 32; //Size of mini batch to use when  training
    int exampleLength = 1000; //Length of each training example sequence to use. This could certainly be increased
    int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters
    int numEpochs = 30; //Total number of training epochs
    int generateSamplesEveryNMinibatches = 10; //How frequently to generate samples from the network? 1000 characters / 50 tbptt length: 20 parameter updates per minibatch
    int nSamplesToGenerate = 4; //Number of samples to generate after each training epoch
    int nCharactersToSample = 300; //Length of each sample to generate
    String generationInitialization = null; //Optional character initialization; a random character is used if null
    // Above is Used to 'prime' the LSTM with a character sequence to continue/complete.
    // Initialization characters must all be in CharacterIterator.getMinimalCharacterSet() by default
    Random rng = new Random(12345);

    //Get a DataSetIterator that handles vectorization of text into something we can use to train
    // our GravesLSTM network.
    CharacterIterator iter = getShakespeareIterator(miniBatchSize, exampleLength);
    int nOut = iter.totalOutcomes();

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1)
            .rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).weightInit(WeightInit.XAVIER)
            .updater(Updater.RMSPROP).list()
            .layer(0,//from  w w w .  j  a  v a2s  .c  o m
                    new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize)
                            .activation(Activation.TANH).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH)
                            .build())
            .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification
                    .nIn(lstmLayerSize).nOut(nOut).build())
            .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength)
            .tBPTTBackwardLength(tbpttLength).pretrain(false).backprop(true).build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.setListeners(new ScoreIterationListener(System.out));

    //Print the  number of parameters in the network (and for each layer)
    Layer[] layers = net.getLayers();
    int totalNumParams = 0;
    for (int i = 0; i < layers.length; i++) {
        int nParams = layers[i].numParams();
        System.out.println("Number of parameters in layer " + i + ": " + nParams);
        totalNumParams += nParams;
    }
    System.out.println("Total number of network parameters: " + totalNumParams);

    //Do training, and then generate and print samples from network
    int miniBatchNumber = 0;
    for (int i = 0; i < numEpochs; i++) {
        System.out.println("Epoch number" + i);
        while (iter.hasNext()) {
            DataSet ds = iter.next();
            net.fit(ds);
            System.out.println("Batch number " + miniBatchNumber);
            if (++miniBatchNumber % generateSamplesEveryNMinibatches == 0) {
                System.out.println("--------------------");
                System.out.println("Completed " + miniBatchNumber + " minibatches of size " + miniBatchSize
                        + "x" + exampleLength + " characters");
                System.out.println("Sampling characters from network given initialization \""
                        + (generationInitialization == null ? "" : generationInitialization) + "\"");
                String[] samples = sampleCharactersFromNetwork(generationInitialization, net, iter, rng,
                        nCharactersToSample, nSamplesToGenerate);
                for (int j = 0; j < samples.length; j++) {
                    System.out.println("----- Sample " + j + " -----");
                    System.out.println(samples[j]);
                    System.out.println();
                }
            }
        }

        iter.reset(); //Reset iterator for another epoch
    }

    System.out.println("\n\nExample complete");
}

From source file:seqmodel.RNNModel.java

MultiLayerNetwork buildRNN(AMISentenceIterator iter) {

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(SEED).iterations(10)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            //.optimizationAlgo(OptimizationAlgorithm.LBFGS)
            //.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
            .updater(Updater.RMSPROP)/*from   w w w .  j  a v  a 2  s.c om*/
            //.updater(Updater.ADAGRAD)
            //.updater(Updater.SGD)
            //.regularization(true).l2(0.0001)
            //.regularization(true).l1(0.001)
            .weightInit(WeightInit.RELU)
            //.weightInit(WeightInit.UNIFORM)
            //.weightInit(WeightInit.XAVIER)
            //.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
            .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
            .gradientNormalizationThreshold(1.0).learningRate(0.001).list(2)
            .layer(0, new GravesLSTM.Builder().nIn(numInputDimensions).nOut(NUM_DIMENSIONS_LSTM)
                    //.activation("softsign").build())
                    .activation("softmax")
                    //.activation("tanh")
                    .build())
            /*        
            .layer(1,
                    new GravesLSTM.Builder()
                    .nIn(25)
                    .nOut(NUM_DIMENSIONS_LSTM)
                    //.activation("softsign").build())
                    .activation("softmax")
                    //.activation("tanh")
                    .build())
            */
            .layer(1, new RnnOutputLayer.Builder().activation("softmax")
                    //.activation("tanh")
                    //.activation("sigmoid")
                    //.lossFunction(LossFunctions.LossFunction.MCXENT)
                    .lossFunction(LossFunctions.LossFunction.RMSE_XENT).nIn(NUM_DIMENSIONS_LSTM).nOut(2) // 2 for binary classification
                    .build())
            .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(TRUNCATED_BPP_LEN)
            .tBPTTBackwardLength(TRUNCATED_BPP_LEN)
            //.backpropType(BackpropType.Standard)
            .pretrain(false).backprop(true).build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.setListeners(new ScoreIterationListener(1));
    return net;
}