Example usage for org.deeplearning4j.nn.conf Updater RMSPROP

List of usage examples for org.deeplearning4j.nn.conf Updater RMSPROP

Introduction

In this page you can find the example usage for org.deeplearning4j.nn.conf Updater RMSPROP.

Prototype

Updater RMSPROP

To view the source code for org.deeplearning4j.nn.conf Updater RMSPROP.

Click Source Link

Usage

From source file:com.javafxpert.neuralnetviz.scenario.BasicRNNExample.java

License:Apache License

public static MultiLayerNetworkEnhanced buildNetwork(WebSocketSession webSocketSession) throws Exception {
    //public static void main(String[] args) {

    // create a dedicated list of possible chars in LEARNSTRING_CHARS_LIST
    LinkedHashSet<Character> LEARNSTRING_CHARS = new LinkedHashSet<Character>();
    for (char c : LEARNSTRING)
        LEARNSTRING_CHARS.add(c);/*from   www.  j  a va 2 s . co m*/
    LEARNSTRING_CHARS_LIST.addAll(LEARNSTRING_CHARS);

    System.out.println("LEARNSTRING_CHARS_LIST: " + LEARNSTRING_CHARS_LIST);

    // some common parameters
    NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder();
    builder.iterations(10);
    builder.learningRate(0.001);
    builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT);
    builder.seed(123);
    builder.biasInit(0);
    builder.miniBatch(false);
    builder.updater(Updater.RMSPROP);
    builder.weightInit(WeightInit.XAVIER);

    ListBuilder listBuilder = builder.list();

    // first difference, for rnns we need to use GravesLSTM.Builder
    for (int i = 0; i < HIDDEN_LAYER_CONT; i++) {
        GravesLSTM.Builder hiddenLayerBuilder = new GravesLSTM.Builder();
        hiddenLayerBuilder.nIn(i == 0 ? LEARNSTRING_CHARS.size() : HIDDEN_LAYER_WIDTH);
        hiddenLayerBuilder.nOut(HIDDEN_LAYER_WIDTH);
        // adopted activation function from GravesLSTMCharModellingExample
        // seems to work well with RNNs
        hiddenLayerBuilder.activation("tanh");
        listBuilder.layer(i, hiddenLayerBuilder.build());
    }

    // we need to use RnnOutputLayer for our RNN
    RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.MCXENT);
    // softmax normalizes the output neurons, the sum of all outputs is 1
    // this is required for our sampleFromDistribution-function
    outputLayerBuilder.activation("softmax");
    outputLayerBuilder.nIn(HIDDEN_LAYER_WIDTH);
    outputLayerBuilder.nOut(LEARNSTRING_CHARS.size());
    listBuilder.layer(HIDDEN_LAYER_CONT, outputLayerBuilder.build());

    // finish builder
    listBuilder.pretrain(false);
    listBuilder.backprop(true);
    listBuilder.build();

    // create network
    MultiLayerConfiguration conf = listBuilder.build();

    String[] inputFeatureNames = {};
    String[] outputLabelNames = {};
    MultiLayerNetworkEnhanced net = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames);
    net.init();
    //net.setListeners(new ScoreIterationListener(1));
    net.setListeners(new ModelListener(100, webSocketSession));

    /*
     * CREATE OUR TRAINING DATA
     */
    // create input and output arrays: SAMPLE_INDEX, INPUT_NEURON,
    // SEQUENCE_POSITION
    INDArray input = Nd4j.zeros(1, LEARNSTRING_CHARS_LIST.size(), LEARNSTRING.length);
    INDArray labels = Nd4j.zeros(1, LEARNSTRING_CHARS_LIST.size(), LEARNSTRING.length);
    // loop through our sample-sentence
    int samplePos = 0;
    for (char currentChar : LEARNSTRING) {
        // small hack: when currentChar is the last, take the first char as
        // nextChar - not really required
        char nextChar = LEARNSTRING[(samplePos + 1) % (LEARNSTRING.length)];
        // input neuron for current-char is 1 at "samplePos"
        input.putScalar(new int[] { 0, LEARNSTRING_CHARS_LIST.indexOf(currentChar), samplePos }, 1);
        // output neuron for next-char is 1 at "samplePos"
        labels.putScalar(new int[] { 0, LEARNSTRING_CHARS_LIST.indexOf(nextChar), samplePos }, 1);
        samplePos++;
    }
    DataSet trainingData = new DataSet(input, labels);

    // some epochs
    for (int epoch = 0; epoch < 100; epoch++) {

        System.out.println("Epoch " + epoch);

        // train the data
        net.fit(trainingData);

        // clear current stance from the last example
        net.rnnClearPreviousState();

        // put the first caracter into the rrn as an initialisation
        INDArray testInit = Nd4j.zeros(LEARNSTRING_CHARS_LIST.size());
        testInit.putScalar(LEARNSTRING_CHARS_LIST.indexOf(LEARNSTRING[0]), 1);

        // run one step -> IMPORTANT: rnnTimeStep() must be called, not
        // output()
        // the output shows what the net thinks what should come next
        INDArray output = net.rnnTimeStep(testInit);

        // now the net sould guess LEARNSTRING.length mor characters
        for (int j = 0; j < LEARNSTRING.length; j++) {

            // first process the last output of the network to a concrete
            // neuron, the neuron with the highest output cas the highest
            // cance to get chosen
            double[] outputProbDistribution = new double[LEARNSTRING_CHARS.size()];
            for (int k = 0; k < outputProbDistribution.length; k++) {
                outputProbDistribution[k] = output.getDouble(k);
            }
            int sampledCharacterIdx = findIndexOfHighestValue(outputProbDistribution);

            // print the chosen output
            System.out.print(LEARNSTRING_CHARS_LIST.get(sampledCharacterIdx));

            // use the last output as input
            INDArray nextInput = Nd4j.zeros(LEARNSTRING_CHARS_LIST.size());
            nextInput.putScalar(sampledCharacterIdx, 1);
            output = net.rnnTimeStep(nextInput);

        }
        System.out.print("\n");

    }
    return net;
}

From source file:com.javafxpert.neuralnetviz.scenario.PebbleGestures.java

License:Apache License

public static void main(String[] args) throws Exception {

    //First: get the dataset using the record reader. CSVRecordReader handles loading/parsing
    int numLinesToSkip = 0;
    String delimiter = ",";
    int miniBatchSize = 10;
    int numPossibleLabels = 3;
    int labelIndex = 3;
    boolean regression = false;
    final int numInputs = 3;
    int iterations = 600;
    long seed = 6;
    double learningRate = 0.003;
    int lstmLayerSize = 20; //Number of units in each GravesLSTM layer

    SequenceRecordReader reader = new CSVSequenceRecordReader(0, ",");
    reader.initialize(new NumberedFileInputSplit("src/main/resources/classification/pebble_data_%d.csv", 0, 2));
    DataSetIterator variableLengthIter = new SequenceRecordReaderDataSetIterator(reader, miniBatchSize,
            numPossibleLabels, labelIndex, regression);

    //org.datavec.api.records.reader.RecordReader recordReader = new org.datavec.api.records.reader.impl.csv.CSVRecordReader(numLinesToSkip,delimiter);
    //recordReader.initialize(new org.datavec.api.split.FileSplit(new File("src/main/resources/classification/speed_dating_all.csv")));

    DataSet allData = variableLengthIter.next();
    //SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.99);  //Use 65% of data for training

    //DataSet trainingData = testAndTrain.getTrain();
    //DataSet testData = testAndTrain.getTest();

    //We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance):
    DataNormalization normalizer = new NormalizerStandardize();
    normalizer.fit(allData); //Collect the statistics (mean/stdev) from the training data. This does not modify the input data
    normalizer.transform(allData); //Apply normalization to the training data
    //normalizer.transform(testData);         //Apply normalization to the test data. This is using statistics calculated from the *training* set

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(iterations)
            .learningRate(learningRate).rmsDecay(0.95).seed(12345).regularization(true).l2(0.001)
            .weightInit(WeightInit.XAVIER).updater(Updater.RMSPROP).list()
            .layer(0, new GravesLSTM.Builder().nIn(numInputs).nOut(lstmLayerSize).activation("tanh").build())
            .layer(1,/*  w w  w . jav  a  2  s .co  m*/
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation("tanh").build())
            .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation("softmax") //MCXENT + softmax for classification
                    .nIn(lstmLayerSize).nOut(numPossibleLabels).build())
            //.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength)
            .pretrain(false).backprop(true).build();

    String[] inputFeatureNames = { "accel x", "accel y", "accel z" };
    String[] outputLabelNames = { "Subject A", "Subject B", "Subject C" };
    MultiLayerNetworkEnhanced model = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames);
    model.init();
    model.setListeners(new ScoreIterationListener(1)); //Print score every 1 parameter updates
    //model.setListeners(new ModelListener(10, webSocketSession));
    model.setDataNormalization(normalizer);

    model.fit(allData);

    //evaluate the model on the test set
    /*
    Evaluation eval = new Evaluation(numPossibleLabels);
    INDArray output = model.output(testData.getFeatureMatrix());
    eval.eval(testData.getLabels(), output);
    System.out.println(eval.stats());
    */

    // Make prediction
    // Expected output: 0
    INDArray example = Nd4j.zeros(1, 3, 10);
    example.putScalar(new int[] { 0, 0, 0 }, 1895);
    example.putScalar(new int[] { 0, 1, 0 }, 2390);
    example.putScalar(new int[] { 0, 2, 0 }, 2024);
    example.putScalar(new int[] { 0, 0, 1 }, 1889);
    example.putScalar(new int[] { 0, 1, 1 }, 2389);
    example.putScalar(new int[] { 0, 2, 1 }, 2022);
    example.putScalar(new int[] { 0, 0, 2 }, 1886);
    example.putScalar(new int[] { 0, 1, 2 }, 2383);
    example.putScalar(new int[] { 0, 2, 2 }, 2027);
    example.putScalar(new int[] { 0, 0, 3 }, 1888);
    example.putScalar(new int[] { 0, 1, 3 }, 2382);
    example.putScalar(new int[] { 0, 2, 3 }, 2028);
    example.putScalar(new int[] { 0, 0, 4 }, 1889);
    example.putScalar(new int[] { 0, 1, 4 }, 2385);
    example.putScalar(new int[] { 0, 2, 4 }, 2027);
    example.putScalar(new int[] { 0, 0, 5 }, 1892);
    example.putScalar(new int[] { 0, 1, 5 }, 2386);
    example.putScalar(new int[] { 0, 2, 5 }, 2023);
    example.putScalar(new int[] { 0, 0, 6 }, 1893);
    example.putScalar(new int[] { 0, 1, 6 }, 2381);
    example.putScalar(new int[] { 0, 2, 6 }, 2020);
    example.putScalar(new int[] { 0, 0, 7 }, 1897);
    example.putScalar(new int[] { 0, 1, 7 }, 2388);
    example.putScalar(new int[] { 0, 2, 7 }, 2030);
    example.putScalar(new int[] { 0, 0, 8 }, 1893);
    example.putScalar(new int[] { 0, 1, 8 }, 2384);
    example.putScalar(new int[] { 0, 2, 8 }, 2027);
    example.putScalar(new int[] { 0, 0, 9 }, 1894);
    example.putScalar(new int[] { 0, 1, 9 }, 2387);
    example.putScalar(new int[] { 0, 2, 9 }, 2030);
    DataSet ds = new DataSet(example, null);
    normalizer.transform(ds);
    model.rnnClearPreviousState();
    INDArray outputActivations = model.output(example);
    System.out.println("outputActivations expected 0: " + outputActivations);

    // Make prediction
    // Expected output: 1
    example = Nd4j.zeros(1, 3, 10);
    example.putScalar(new int[] { 0, 0, 0 }, 2121);
    example.putScalar(new int[] { 0, 1, 0 }, 2349);
    example.putScalar(new int[] { 0, 2, 0 }, 1966);
    example.putScalar(new int[] { 0, 0, 1 }, 2124);
    example.putScalar(new int[] { 0, 1, 1 }, 2354);
    example.putScalar(new int[] { 0, 2, 1 }, 1966);
    example.putScalar(new int[] { 0, 0, 2 }, 2122);
    example.putScalar(new int[] { 0, 1, 2 }, 2357);
    example.putScalar(new int[] { 0, 2, 2 }, 1970);
    example.putScalar(new int[] { 0, 0, 3 }, 2122);
    example.putScalar(new int[] { 0, 1, 3 }, 2355);
    example.putScalar(new int[] { 0, 2, 3 }, 1966);
    example.putScalar(new int[] { 0, 0, 4 }, 2123);
    example.putScalar(new int[] { 0, 1, 4 }, 2347);
    example.putScalar(new int[] { 0, 2, 4 }, 1971);
    example.putScalar(new int[] { 0, 0, 5 }, 2123);
    example.putScalar(new int[] { 0, 1, 5 }, 2347);
    example.putScalar(new int[] { 0, 2, 5 }, 1967);
    example.putScalar(new int[] { 0, 0, 6 }, 2119);
    example.putScalar(new int[] { 0, 1, 6 }, 2354);
    example.putScalar(new int[] { 0, 2, 6 }, 1966);
    example.putScalar(new int[] { 0, 0, 7 }, 2114);
    example.putScalar(new int[] { 0, 1, 7 }, 2350);
    example.putScalar(new int[] { 0, 2, 7 }, 1963);
    example.putScalar(new int[] { 0, 0, 8 }, 2123);
    example.putScalar(new int[] { 0, 1, 8 }, 2351);
    example.putScalar(new int[] { 0, 2, 8 }, 1966);
    example.putScalar(new int[] { 0, 0, 9 }, 2126);
    example.putScalar(new int[] { 0, 1, 9 }, 2351);
    example.putScalar(new int[] { 0, 2, 9 }, 1963);
    ds = new DataSet(example, null);
    normalizer.transform(ds);
    model.rnnClearPreviousState();
    outputActivations = model.output(example);
    System.out.println("outputActivations expected 1: " + outputActivations);

    // Make prediction
    // Expected output: 2
    example = Nd4j.zeros(1, 3, 10);
    example.putScalar(new int[] { 0, 0, 0 }, 1925);
    example.putScalar(new int[] { 0, 1, 0 }, 2386);
    example.putScalar(new int[] { 0, 2, 0 }, 1983);
    example.putScalar(new int[] { 0, 0, 1 }, 1925);
    example.putScalar(new int[] { 0, 1, 1 }, 2389);
    example.putScalar(new int[] { 0, 2, 1 }, 1983);
    example.putScalar(new int[] { 0, 0, 2 }, 1923);
    example.putScalar(new int[] { 0, 1, 2 }, 2393);
    example.putScalar(new int[] { 0, 2, 2 }, 1985);
    example.putScalar(new int[] { 0, 0, 3 }, 1918);
    example.putScalar(new int[] { 0, 1, 3 }, 2386);
    example.putScalar(new int[] { 0, 2, 3 }, 1980);
    example.putScalar(new int[] { 0, 0, 4 }, 1922);
    example.putScalar(new int[] { 0, 1, 4 }, 2393);
    example.putScalar(new int[] { 0, 2, 4 }, 1978);
    example.putScalar(new int[] { 0, 0, 5 }, 1918);
    example.putScalar(new int[] { 0, 1, 5 }, 2383);
    example.putScalar(new int[] { 0, 2, 5 }, 1987);
    example.putScalar(new int[] { 0, 0, 6 }, 1927);
    example.putScalar(new int[] { 0, 1, 6 }, 2385);
    example.putScalar(new int[] { 0, 2, 6 }, 1984);
    example.putScalar(new int[] { 0, 0, 7 }, 1927);
    example.putScalar(new int[] { 0, 1, 7 }, 2384);
    example.putScalar(new int[] { 0, 2, 7 }, 1986);
    example.putScalar(new int[] { 0, 0, 8 }, 1922);
    example.putScalar(new int[] { 0, 1, 8 }, 2391);
    example.putScalar(new int[] { 0, 2, 8 }, 1985);
    example.putScalar(new int[] { 0, 0, 9 }, 1919);
    example.putScalar(new int[] { 0, 1, 9 }, 2389);
    example.putScalar(new int[] { 0, 2, 9 }, 1986);
    ds = new DataSet(example, null);
    normalizer.transform(ds);
    model.rnnClearPreviousState();
    outputActivations = model.output(example);
    System.out.println("outputActivations expected 2: " + outputActivations);

    // Make prediction
    // Expected output: 2
    example = Nd4j.zeros(1, 3, 10);

    example.putScalar(new int[] { 0, 0, 0 }, 1923);
    example.putScalar(new int[] { 0, 1, 0 }, 2335);
    example.putScalar(new int[] { 0, 2, 0 }, 1953);
    example.putScalar(new int[] { 0, 0, 1 }, 1945);
    example.putScalar(new int[] { 0, 1, 1 }, 2382);
    example.putScalar(new int[] { 0, 2, 1 }, 1939);
    example.putScalar(new int[] { 0, 0, 2 }, 1955);
    example.putScalar(new int[] { 0, 1, 2 }, 2397);
    example.putScalar(new int[] { 0, 2, 2 }, 1953);
    example.putScalar(new int[] { 0, 0, 3 }, 1929);
    example.putScalar(new int[] { 0, 1, 3 }, 2386);
    example.putScalar(new int[] { 0, 2, 3 }, 1919);
    example.putScalar(new int[] { 0, 0, 4 }, 1895);
    example.putScalar(new int[] { 0, 1, 4 }, 2379);
    example.putScalar(new int[] { 0, 2, 4 }, 1918);
    example.putScalar(new int[] { 0, 0, 5 }, 1887);
    example.putScalar(new int[] { 0, 1, 5 }, 2389);
    example.putScalar(new int[] { 0, 2, 5 }, 1927);
    example.putScalar(new int[] { 0, 0, 6 }, 1895);
    example.putScalar(new int[] { 0, 1, 6 }, 2392);
    example.putScalar(new int[] { 0, 2, 6 }, 1929);
    example.putScalar(new int[] { 0, 0, 7 }, 1898);
    example.putScalar(new int[] { 0, 1, 7 }, 2402);
    example.putScalar(new int[] { 0, 2, 7 }, 1914);
    example.putScalar(new int[] { 0, 0, 8 }, 1882);
    example.putScalar(new int[] { 0, 1, 8 }, 2395);
    example.putScalar(new int[] { 0, 2, 8 }, 1894);
    example.putScalar(new int[] { 0, 0, 9 }, 1867);
    example.putScalar(new int[] { 0, 1, 9 }, 2403);
    example.putScalar(new int[] { 0, 2, 9 }, 1855);

    ds = new DataSet(example, null);
    normalizer.transform(ds);
    model.rnnClearPreviousState();
    outputActivations = model.output(example);
    System.out.println("outputActivations expected 2 again: " + outputActivations);

    System.out.println("****************Example finished********************");

    //return model;
}

From source file:com.sliit.neuralnetwork.RecurrentNN.java

public void buildModel() {

    System.out.println("Build model....");
    iterations = outputNum + 1;/*w  w w.j a  v a2s  . c  om*/
    NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder();
    builder.iterations(iterations);
    builder.learningRate(0.001);
    // builder.momentum(0.01);
    builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT);
    builder.seed(seed);
    builder.biasInit(1);
    builder.regularization(true).l2(1e-5);
    builder.updater(Updater.RMSPROP);
    builder.weightInit(WeightInit.XAVIER);

    NeuralNetConfiguration.ListBuilder list = builder.list();

    for (int i = 0; i < HIDDEN_LAYER_COUNT; i++) {

        GravesLSTM.Builder hiddenLayerBuilder = new GravesLSTM.Builder();
        hiddenLayerBuilder.nIn(i == 0 ? inputs : numHiddenNodes);
        hiddenLayerBuilder.nOut(numHiddenNodes);
        hiddenLayerBuilder.activation("tanh");
        list.layer(i, hiddenLayerBuilder.build());
    }

    RnnOutputLayer.Builder outputLayer = new RnnOutputLayer.Builder(LossFunction.MCXENT);
    outputLayer.activation("softmax");
    outputLayer.nIn(numHiddenNodes);
    outputLayer.nOut(outputNum);
    list.layer(HIDDEN_LAYER_COUNT, outputLayer.build());
    list.pretrain(false);
    list.backprop(true);
    MultiLayerConfiguration configuration = list.build();
    model = new MultiLayerNetwork(configuration);
    model.init();
    //model.setListeners(new ScoreIterationListener(1));

}

From source file:org.ensor.fftmusings.autoencoder.RNNTrainer.java

public static void main(String[] args) throws Exception {

    MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn");

    Random rng = new Random();

    RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out);

    double learningRate = 0.0001;
    if (args.length != 0) {
        learningRate = Double.parseDouble(args[0]);
    }/*from   w  w  w .  j  av a2s. c  om*/

    int nGaussians = 8;
    int labelWidth = iter.totalOutcomes();
    int inputWidth = iter.inputColumns();
    int lstmLayerSize = 400;
    int bttLength = 50;

    LossMixtureDensity costFunction = LossMixtureDensity.builder().gaussians(nGaussians).labelWidth(inputWidth)
            .build();

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
            .learningRate(learningRate).rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001)
            .weightInit(WeightInit.XAVIER).list()
            .layer(0,
                    new GravesLSTM.Builder().nIn(inputWidth).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).build())
            //                .layer(2, new RnnOutputLayer.Builder()
            //                        .nIn(lstmLayerSize)
            //                        .nOut((labelWidth + 2) * nGaussians)
            //                        .activation(Activation.IDENTITY)
            //                        //.lossFunction(LossFunctions.LossFunction.MSE)
            //                        .lossFunction(LossMixtureDensity.builder()
            //                            .gaussians(nGaussians)
            //                            .labelWidth(inputWidth)
            //                            .build())
            //                        .updater(Updater.RMSPROP)
            //                        .weightInit(WeightInit.DISTRIBUTION)
            //                        .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(2,
                    new MixtureDensityRNNOutputLayer.Builder().gaussians(nGaussians).nIn(lstmLayerSize)
                            .nOut(labelWidth).updater(Updater.RMSPROP).build())
            .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT)
            .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build();

    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();
    model.setListeners(new ScoreIterationListener(System.out));

    for (int epoch = 0; epoch < 300; epoch++) {
        model.fit(iter);
        iter.reset();
        evaluateModel(model, costFunction, stackedAutoencoder, rng, epoch);
        ModelSerializer.writeModel(model, "stack-timeseries.rnn", true);
    }
}

From source file:org.ensor.fftmusings.autoencoder.RNNTrainer2.java

public static void main(String[] args) throws Exception {

    MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn");

    Random rng = new Random();

    RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out);

    int labels = iter.inputColumns();
    int lstmLayerSize = 200;
    int bttLength = 50;

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1)
            .rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001).list()
            .layer(0,/*from   w  w  w . j  a  va2  s  .  c  o m*/
                    new GravesLSTM.Builder().nIn(labels).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(2,
                    new RnnOutputLayer.Builder().nIn(lstmLayerSize).nOut(labels).lossFunction(LossFunction.MSE)
                            .updater(Updater.RMSPROP).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT)
            .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build();

    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();
    model.setListeners(new ScoreIterationListener(System.out));

    for (int epoch = 0; epoch < 300; epoch++) {
        model.fit(iter);
        iter.reset();
        evaluateModel(model, stackedAutoencoder, rng, epoch);
        ModelSerializer.writeModel(model, "stack-timeseries.rnn", true);
    }
}

From source file:org.ensor.fftmusings.rnn.qft.SampleLSTM.java

public static MultiLayerNetwork create(File modelFilename, DataSetIterator iter) throws IOException {

    if (modelFilename.exists()) {
        return load(modelFilename);
    }/*  w  w w  .j a  v a 2s .com*/

    int nOut = iter.totalOutcomes();

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
            .learningRate(0.01).rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).list()
            .layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize)
                    .updater(Updater.RMSPROP).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                    .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(2,
                    new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification
                            .updater(Updater.RMSPROP).nIn(lstmLayerSize).nOut(nOut)
                            .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.08, 0.08))
                            .build())
            .pretrain(false).backprop(true).build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.setListeners(new ScoreIterationListener());

    ModelSerializer.writeModel(net, modelFilename, true);

    return net;
}

From source file:org.ensor.fftmusings.rnn.RNNFactory.java

public static MultiLayerNetwork create(File modelFilename, CharacterIterator iter) throws IOException {

    if (modelFilename.exists()) {
        MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(modelFilename);
        net.clear();/* w  w w . jav a 2 s. com*/
        net.setListeners(new ScoreIterationListener(System.out));
        return net;
    }

    int nOut = iter.totalOutcomes();

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1)
            .rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).list()
            .layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize)
                    .updater(Updater.RMSPROP).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                    .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(2,
                    new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification
                            .updater(Updater.RMSPROP).nIn(lstmLayerSize).nOut(nOut)
                            .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.08, 0.08))
                            .build())
            .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.setListeners(new ScoreIterationListener(System.out));

    ModelSerializer.writeModel(net, modelFilename, true);

    return net;
}

From source file:org.ensor.fftmusings.rnn2.GravesLSTMCharModellingExample.java

public static void main(String[] args) throws Exception {
    int lstmLayerSize = 200; //Number of units in each GravesLSTM layer
    int miniBatchSize = 32; //Size of mini batch to use when  training
    int exampleLength = 1000; //Length of each training example sequence to use. This could certainly be increased
    int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters
    int numEpochs = 30; //Total number of training epochs
    int generateSamplesEveryNMinibatches = 10; //How frequently to generate samples from the network? 1000 characters / 50 tbptt length: 20 parameter updates per minibatch
    int nSamplesToGenerate = 4; //Number of samples to generate after each training epoch
    int nCharactersToSample = 300; //Length of each sample to generate
    String generationInitialization = null; //Optional character initialization; a random character is used if null
    // Above is Used to 'prime' the LSTM with a character sequence to continue/complete.
    // Initialization characters must all be in CharacterIterator.getMinimalCharacterSet() by default
    Random rng = new Random(12345);

    //Get a DataSetIterator that handles vectorization of text into something we can use to train
    // our GravesLSTM network.
    CharacterIterator iter = getShakespeareIterator(miniBatchSize, exampleLength);
    int nOut = iter.totalOutcomes();

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1)
            .rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).weightInit(WeightInit.XAVIER)
            .updater(Updater.RMSPROP).list()
            .layer(0,/*from  w  w w  .  ja  v a2 s  .c  o  m*/
                    new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize)
                            .activation(Activation.TANH).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH)
                            .build())
            .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification
                    .nIn(lstmLayerSize).nOut(nOut).build())
            .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength)
            .tBPTTBackwardLength(tbpttLength).pretrain(false).backprop(true).build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.setListeners(new ScoreIterationListener(System.out));

    //Print the  number of parameters in the network (and for each layer)
    Layer[] layers = net.getLayers();
    int totalNumParams = 0;
    for (int i = 0; i < layers.length; i++) {
        int nParams = layers[i].numParams();
        System.out.println("Number of parameters in layer " + i + ": " + nParams);
        totalNumParams += nParams;
    }
    System.out.println("Total number of network parameters: " + totalNumParams);

    //Do training, and then generate and print samples from network
    int miniBatchNumber = 0;
    for (int i = 0; i < numEpochs; i++) {
        System.out.println("Epoch number" + i);
        while (iter.hasNext()) {
            DataSet ds = iter.next();
            net.fit(ds);
            System.out.println("Batch number " + miniBatchNumber);
            if (++miniBatchNumber % generateSamplesEveryNMinibatches == 0) {
                System.out.println("--------------------");
                System.out.println("Completed " + miniBatchNumber + " minibatches of size " + miniBatchSize
                        + "x" + exampleLength + " characters");
                System.out.println("Sampling characters from network given initialization \""
                        + (generationInitialization == null ? "" : generationInitialization) + "\"");
                String[] samples = sampleCharactersFromNetwork(generationInitialization, net, iter, rng,
                        nCharactersToSample, nSamplesToGenerate);
                for (int j = 0; j < samples.length; j++) {
                    System.out.println("----- Sample " + j + " -----");
                    System.out.println(samples[j]);
                    System.out.println();
                }
            }
        }

        iter.reset(); //Reset iterator for another epoch
    }

    System.out.println("\n\nExample complete");
}

From source file:org.wso2.carbon.ml.rest.api.neuralNetworks.FeedForwardNetwork.java

License:Open Source License

/**
 * method to map user selected Updater Algorithm to Updater object.
 * @param updater// w w w.  j av  a 2 s . c o m
 * @return an Updater object .
 */
Updater mapUpdater(String updater) {

    Updater updaterAlgo = null;

    switch (updater) {

    case "sgd":
        updaterAlgo = Updater.SGD;
        break;

    case "adam":
        updaterAlgo = Updater.ADAM;
        break;

    case "adadelta":
        updaterAlgo = Updater.ADADELTA;
        break;

    case "nesterovs":
        updaterAlgo = Updater.NESTEROVS;
        break;

    case "adagrad":
        updaterAlgo = Updater.ADAGRAD;
        break;

    case "rmsprop":
        updaterAlgo = Updater.RMSPROP;
        break;

    case "none":
        updaterAlgo = Updater.NONE;
        break;

    case "custom":
        updaterAlgo = Updater.CUSTOM;
        break;

    default:
        updaterAlgo = null;
        break;
    }
    return updaterAlgo;
}

From source file:seqmodel.RNNModel.java

MultiLayerNetwork buildRNN(AMISentenceIterator iter) {

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(SEED).iterations(10)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            //.optimizationAlgo(OptimizationAlgorithm.LBFGS)
            //.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
            .updater(Updater.RMSPROP)
            //.updater(Updater.ADAGRAD)
            //.updater(Updater.SGD)
            //.regularization(true).l2(0.0001)
            //.regularization(true).l1(0.001)
            .weightInit(WeightInit.RELU)
            //.weightInit(WeightInit.UNIFORM)
            //.weightInit(WeightInit.XAVIER)
            //.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
            .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
            .gradientNormalizationThreshold(1.0).learningRate(0.001).list(2)
            .layer(0, new GravesLSTM.Builder().nIn(numInputDimensions).nOut(NUM_DIMENSIONS_LSTM)
                    //.activation("softsign").build())
                    .activation("softmax")
                    //.activation("tanh")
                    .build())//from   w w  w  . j  a v a  2  s. c om
            /*        
            .layer(1,
                    new GravesLSTM.Builder()
                    .nIn(25)
                    .nOut(NUM_DIMENSIONS_LSTM)
                    //.activation("softsign").build())
                    .activation("softmax")
                    //.activation("tanh")
                    .build())
            */
            .layer(1, new RnnOutputLayer.Builder().activation("softmax")
                    //.activation("tanh")
                    //.activation("sigmoid")
                    //.lossFunction(LossFunctions.LossFunction.MCXENT)
                    .lossFunction(LossFunctions.LossFunction.RMSE_XENT).nIn(NUM_DIMENSIONS_LSTM).nOut(2) // 2 for binary classification
                    .build())
            .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(TRUNCATED_BPP_LEN)
            .tBPTTBackwardLength(TRUNCATED_BPP_LEN)
            //.backpropType(BackpropType.Standard)
            .pretrain(false).backprop(true).build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.setListeners(new ScoreIterationListener(1));
    return net;
}