List of usage examples for org.deeplearning4j.nn.conf Updater RMSPROP
Updater RMSPROP
To view the source code for org.deeplearning4j.nn.conf Updater RMSPROP.
Click Source Link
From source file:com.javafxpert.neuralnetviz.scenario.BasicRNNExample.java
License:Apache License
public static MultiLayerNetworkEnhanced buildNetwork(WebSocketSession webSocketSession) throws Exception { //public static void main(String[] args) { // create a dedicated list of possible chars in LEARNSTRING_CHARS_LIST LinkedHashSet<Character> LEARNSTRING_CHARS = new LinkedHashSet<Character>(); for (char c : LEARNSTRING) LEARNSTRING_CHARS.add(c);/*from www. j a va 2 s . co m*/ LEARNSTRING_CHARS_LIST.addAll(LEARNSTRING_CHARS); System.out.println("LEARNSTRING_CHARS_LIST: " + LEARNSTRING_CHARS_LIST); // some common parameters NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder(); builder.iterations(10); builder.learningRate(0.001); builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); builder.seed(123); builder.biasInit(0); builder.miniBatch(false); builder.updater(Updater.RMSPROP); builder.weightInit(WeightInit.XAVIER); ListBuilder listBuilder = builder.list(); // first difference, for rnns we need to use GravesLSTM.Builder for (int i = 0; i < HIDDEN_LAYER_CONT; i++) { GravesLSTM.Builder hiddenLayerBuilder = new GravesLSTM.Builder(); hiddenLayerBuilder.nIn(i == 0 ? LEARNSTRING_CHARS.size() : HIDDEN_LAYER_WIDTH); hiddenLayerBuilder.nOut(HIDDEN_LAYER_WIDTH); // adopted activation function from GravesLSTMCharModellingExample // seems to work well with RNNs hiddenLayerBuilder.activation("tanh"); listBuilder.layer(i, hiddenLayerBuilder.build()); } // we need to use RnnOutputLayer for our RNN RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.MCXENT); // softmax normalizes the output neurons, the sum of all outputs is 1 // this is required for our sampleFromDistribution-function outputLayerBuilder.activation("softmax"); outputLayerBuilder.nIn(HIDDEN_LAYER_WIDTH); outputLayerBuilder.nOut(LEARNSTRING_CHARS.size()); listBuilder.layer(HIDDEN_LAYER_CONT, outputLayerBuilder.build()); // finish builder listBuilder.pretrain(false); listBuilder.backprop(true); listBuilder.build(); // create network MultiLayerConfiguration conf = listBuilder.build(); String[] inputFeatureNames = {}; String[] outputLabelNames = {}; MultiLayerNetworkEnhanced net = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames); net.init(); //net.setListeners(new ScoreIterationListener(1)); net.setListeners(new ModelListener(100, webSocketSession)); /* * CREATE OUR TRAINING DATA */ // create input and output arrays: SAMPLE_INDEX, INPUT_NEURON, // SEQUENCE_POSITION INDArray input = Nd4j.zeros(1, LEARNSTRING_CHARS_LIST.size(), LEARNSTRING.length); INDArray labels = Nd4j.zeros(1, LEARNSTRING_CHARS_LIST.size(), LEARNSTRING.length); // loop through our sample-sentence int samplePos = 0; for (char currentChar : LEARNSTRING) { // small hack: when currentChar is the last, take the first char as // nextChar - not really required char nextChar = LEARNSTRING[(samplePos + 1) % (LEARNSTRING.length)]; // input neuron for current-char is 1 at "samplePos" input.putScalar(new int[] { 0, LEARNSTRING_CHARS_LIST.indexOf(currentChar), samplePos }, 1); // output neuron for next-char is 1 at "samplePos" labels.putScalar(new int[] { 0, LEARNSTRING_CHARS_LIST.indexOf(nextChar), samplePos }, 1); samplePos++; } DataSet trainingData = new DataSet(input, labels); // some epochs for (int epoch = 0; epoch < 100; epoch++) { System.out.println("Epoch " + epoch); // train the data net.fit(trainingData); // clear current stance from the last example net.rnnClearPreviousState(); // put the first caracter into the rrn as an initialisation INDArray testInit = Nd4j.zeros(LEARNSTRING_CHARS_LIST.size()); testInit.putScalar(LEARNSTRING_CHARS_LIST.indexOf(LEARNSTRING[0]), 1); // run one step -> IMPORTANT: rnnTimeStep() must be called, not // output() // the output shows what the net thinks what should come next INDArray output = net.rnnTimeStep(testInit); // now the net sould guess LEARNSTRING.length mor characters for (int j = 0; j < LEARNSTRING.length; j++) { // first process the last output of the network to a concrete // neuron, the neuron with the highest output cas the highest // cance to get chosen double[] outputProbDistribution = new double[LEARNSTRING_CHARS.size()]; for (int k = 0; k < outputProbDistribution.length; k++) { outputProbDistribution[k] = output.getDouble(k); } int sampledCharacterIdx = findIndexOfHighestValue(outputProbDistribution); // print the chosen output System.out.print(LEARNSTRING_CHARS_LIST.get(sampledCharacterIdx)); // use the last output as input INDArray nextInput = Nd4j.zeros(LEARNSTRING_CHARS_LIST.size()); nextInput.putScalar(sampledCharacterIdx, 1); output = net.rnnTimeStep(nextInput); } System.out.print("\n"); } return net; }
From source file:com.javafxpert.neuralnetviz.scenario.PebbleGestures.java
License:Apache License
public static void main(String[] args) throws Exception { //First: get the dataset using the record reader. CSVRecordReader handles loading/parsing int numLinesToSkip = 0; String delimiter = ","; int miniBatchSize = 10; int numPossibleLabels = 3; int labelIndex = 3; boolean regression = false; final int numInputs = 3; int iterations = 600; long seed = 6; double learningRate = 0.003; int lstmLayerSize = 20; //Number of units in each GravesLSTM layer SequenceRecordReader reader = new CSVSequenceRecordReader(0, ","); reader.initialize(new NumberedFileInputSplit("src/main/resources/classification/pebble_data_%d.csv", 0, 2)); DataSetIterator variableLengthIter = new SequenceRecordReaderDataSetIterator(reader, miniBatchSize, numPossibleLabels, labelIndex, regression); //org.datavec.api.records.reader.RecordReader recordReader = new org.datavec.api.records.reader.impl.csv.CSVRecordReader(numLinesToSkip,delimiter); //recordReader.initialize(new org.datavec.api.split.FileSplit(new File("src/main/resources/classification/speed_dating_all.csv"))); DataSet allData = variableLengthIter.next(); //SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.99); //Use 65% of data for training //DataSet trainingData = testAndTrain.getTrain(); //DataSet testData = testAndTrain.getTest(); //We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance): DataNormalization normalizer = new NormalizerStandardize(); normalizer.fit(allData); //Collect the statistics (mean/stdev) from the training data. This does not modify the input data normalizer.transform(allData); //Apply normalization to the training data //normalizer.transform(testData); //Apply normalization to the test data. This is using statistics calculated from the *training* set //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(iterations) .learningRate(learningRate).rmsDecay(0.95).seed(12345).regularization(true).l2(0.001) .weightInit(WeightInit.XAVIER).updater(Updater.RMSPROP).list() .layer(0, new GravesLSTM.Builder().nIn(numInputs).nOut(lstmLayerSize).activation("tanh").build()) .layer(1,/* w w w . jav a 2 s .co m*/ new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation("tanh").build()) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation("softmax") //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(numPossibleLabels).build()) //.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true).build(); String[] inputFeatureNames = { "accel x", "accel y", "accel z" }; String[] outputLabelNames = { "Subject A", "Subject B", "Subject C" }; MultiLayerNetworkEnhanced model = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames); model.init(); model.setListeners(new ScoreIterationListener(1)); //Print score every 1 parameter updates //model.setListeners(new ModelListener(10, webSocketSession)); model.setDataNormalization(normalizer); model.fit(allData); //evaluate the model on the test set /* Evaluation eval = new Evaluation(numPossibleLabels); INDArray output = model.output(testData.getFeatureMatrix()); eval.eval(testData.getLabels(), output); System.out.println(eval.stats()); */ // Make prediction // Expected output: 0 INDArray example = Nd4j.zeros(1, 3, 10); example.putScalar(new int[] { 0, 0, 0 }, 1895); example.putScalar(new int[] { 0, 1, 0 }, 2390); example.putScalar(new int[] { 0, 2, 0 }, 2024); example.putScalar(new int[] { 0, 0, 1 }, 1889); example.putScalar(new int[] { 0, 1, 1 }, 2389); example.putScalar(new int[] { 0, 2, 1 }, 2022); example.putScalar(new int[] { 0, 0, 2 }, 1886); example.putScalar(new int[] { 0, 1, 2 }, 2383); example.putScalar(new int[] { 0, 2, 2 }, 2027); example.putScalar(new int[] { 0, 0, 3 }, 1888); example.putScalar(new int[] { 0, 1, 3 }, 2382); example.putScalar(new int[] { 0, 2, 3 }, 2028); example.putScalar(new int[] { 0, 0, 4 }, 1889); example.putScalar(new int[] { 0, 1, 4 }, 2385); example.putScalar(new int[] { 0, 2, 4 }, 2027); example.putScalar(new int[] { 0, 0, 5 }, 1892); example.putScalar(new int[] { 0, 1, 5 }, 2386); example.putScalar(new int[] { 0, 2, 5 }, 2023); example.putScalar(new int[] { 0, 0, 6 }, 1893); example.putScalar(new int[] { 0, 1, 6 }, 2381); example.putScalar(new int[] { 0, 2, 6 }, 2020); example.putScalar(new int[] { 0, 0, 7 }, 1897); example.putScalar(new int[] { 0, 1, 7 }, 2388); example.putScalar(new int[] { 0, 2, 7 }, 2030); example.putScalar(new int[] { 0, 0, 8 }, 1893); example.putScalar(new int[] { 0, 1, 8 }, 2384); example.putScalar(new int[] { 0, 2, 8 }, 2027); example.putScalar(new int[] { 0, 0, 9 }, 1894); example.putScalar(new int[] { 0, 1, 9 }, 2387); example.putScalar(new int[] { 0, 2, 9 }, 2030); DataSet ds = new DataSet(example, null); normalizer.transform(ds); model.rnnClearPreviousState(); INDArray outputActivations = model.output(example); System.out.println("outputActivations expected 0: " + outputActivations); // Make prediction // Expected output: 1 example = Nd4j.zeros(1, 3, 10); example.putScalar(new int[] { 0, 0, 0 }, 2121); example.putScalar(new int[] { 0, 1, 0 }, 2349); example.putScalar(new int[] { 0, 2, 0 }, 1966); example.putScalar(new int[] { 0, 0, 1 }, 2124); example.putScalar(new int[] { 0, 1, 1 }, 2354); example.putScalar(new int[] { 0, 2, 1 }, 1966); example.putScalar(new int[] { 0, 0, 2 }, 2122); example.putScalar(new int[] { 0, 1, 2 }, 2357); example.putScalar(new int[] { 0, 2, 2 }, 1970); example.putScalar(new int[] { 0, 0, 3 }, 2122); example.putScalar(new int[] { 0, 1, 3 }, 2355); example.putScalar(new int[] { 0, 2, 3 }, 1966); example.putScalar(new int[] { 0, 0, 4 }, 2123); example.putScalar(new int[] { 0, 1, 4 }, 2347); example.putScalar(new int[] { 0, 2, 4 }, 1971); example.putScalar(new int[] { 0, 0, 5 }, 2123); example.putScalar(new int[] { 0, 1, 5 }, 2347); example.putScalar(new int[] { 0, 2, 5 }, 1967); example.putScalar(new int[] { 0, 0, 6 }, 2119); example.putScalar(new int[] { 0, 1, 6 }, 2354); example.putScalar(new int[] { 0, 2, 6 }, 1966); example.putScalar(new int[] { 0, 0, 7 }, 2114); example.putScalar(new int[] { 0, 1, 7 }, 2350); example.putScalar(new int[] { 0, 2, 7 }, 1963); example.putScalar(new int[] { 0, 0, 8 }, 2123); example.putScalar(new int[] { 0, 1, 8 }, 2351); example.putScalar(new int[] { 0, 2, 8 }, 1966); example.putScalar(new int[] { 0, 0, 9 }, 2126); example.putScalar(new int[] { 0, 1, 9 }, 2351); example.putScalar(new int[] { 0, 2, 9 }, 1963); ds = new DataSet(example, null); normalizer.transform(ds); model.rnnClearPreviousState(); outputActivations = model.output(example); System.out.println("outputActivations expected 1: " + outputActivations); // Make prediction // Expected output: 2 example = Nd4j.zeros(1, 3, 10); example.putScalar(new int[] { 0, 0, 0 }, 1925); example.putScalar(new int[] { 0, 1, 0 }, 2386); example.putScalar(new int[] { 0, 2, 0 }, 1983); example.putScalar(new int[] { 0, 0, 1 }, 1925); example.putScalar(new int[] { 0, 1, 1 }, 2389); example.putScalar(new int[] { 0, 2, 1 }, 1983); example.putScalar(new int[] { 0, 0, 2 }, 1923); example.putScalar(new int[] { 0, 1, 2 }, 2393); example.putScalar(new int[] { 0, 2, 2 }, 1985); example.putScalar(new int[] { 0, 0, 3 }, 1918); example.putScalar(new int[] { 0, 1, 3 }, 2386); example.putScalar(new int[] { 0, 2, 3 }, 1980); example.putScalar(new int[] { 0, 0, 4 }, 1922); example.putScalar(new int[] { 0, 1, 4 }, 2393); example.putScalar(new int[] { 0, 2, 4 }, 1978); example.putScalar(new int[] { 0, 0, 5 }, 1918); example.putScalar(new int[] { 0, 1, 5 }, 2383); example.putScalar(new int[] { 0, 2, 5 }, 1987); example.putScalar(new int[] { 0, 0, 6 }, 1927); example.putScalar(new int[] { 0, 1, 6 }, 2385); example.putScalar(new int[] { 0, 2, 6 }, 1984); example.putScalar(new int[] { 0, 0, 7 }, 1927); example.putScalar(new int[] { 0, 1, 7 }, 2384); example.putScalar(new int[] { 0, 2, 7 }, 1986); example.putScalar(new int[] { 0, 0, 8 }, 1922); example.putScalar(new int[] { 0, 1, 8 }, 2391); example.putScalar(new int[] { 0, 2, 8 }, 1985); example.putScalar(new int[] { 0, 0, 9 }, 1919); example.putScalar(new int[] { 0, 1, 9 }, 2389); example.putScalar(new int[] { 0, 2, 9 }, 1986); ds = new DataSet(example, null); normalizer.transform(ds); model.rnnClearPreviousState(); outputActivations = model.output(example); System.out.println("outputActivations expected 2: " + outputActivations); // Make prediction // Expected output: 2 example = Nd4j.zeros(1, 3, 10); example.putScalar(new int[] { 0, 0, 0 }, 1923); example.putScalar(new int[] { 0, 1, 0 }, 2335); example.putScalar(new int[] { 0, 2, 0 }, 1953); example.putScalar(new int[] { 0, 0, 1 }, 1945); example.putScalar(new int[] { 0, 1, 1 }, 2382); example.putScalar(new int[] { 0, 2, 1 }, 1939); example.putScalar(new int[] { 0, 0, 2 }, 1955); example.putScalar(new int[] { 0, 1, 2 }, 2397); example.putScalar(new int[] { 0, 2, 2 }, 1953); example.putScalar(new int[] { 0, 0, 3 }, 1929); example.putScalar(new int[] { 0, 1, 3 }, 2386); example.putScalar(new int[] { 0, 2, 3 }, 1919); example.putScalar(new int[] { 0, 0, 4 }, 1895); example.putScalar(new int[] { 0, 1, 4 }, 2379); example.putScalar(new int[] { 0, 2, 4 }, 1918); example.putScalar(new int[] { 0, 0, 5 }, 1887); example.putScalar(new int[] { 0, 1, 5 }, 2389); example.putScalar(new int[] { 0, 2, 5 }, 1927); example.putScalar(new int[] { 0, 0, 6 }, 1895); example.putScalar(new int[] { 0, 1, 6 }, 2392); example.putScalar(new int[] { 0, 2, 6 }, 1929); example.putScalar(new int[] { 0, 0, 7 }, 1898); example.putScalar(new int[] { 0, 1, 7 }, 2402); example.putScalar(new int[] { 0, 2, 7 }, 1914); example.putScalar(new int[] { 0, 0, 8 }, 1882); example.putScalar(new int[] { 0, 1, 8 }, 2395); example.putScalar(new int[] { 0, 2, 8 }, 1894); example.putScalar(new int[] { 0, 0, 9 }, 1867); example.putScalar(new int[] { 0, 1, 9 }, 2403); example.putScalar(new int[] { 0, 2, 9 }, 1855); ds = new DataSet(example, null); normalizer.transform(ds); model.rnnClearPreviousState(); outputActivations = model.output(example); System.out.println("outputActivations expected 2 again: " + outputActivations); System.out.println("****************Example finished********************"); //return model; }
From source file:com.sliit.neuralnetwork.RecurrentNN.java
public void buildModel() { System.out.println("Build model...."); iterations = outputNum + 1;/*w w w.j a v a2s . c om*/ NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder(); builder.iterations(iterations); builder.learningRate(0.001); // builder.momentum(0.01); builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); builder.seed(seed); builder.biasInit(1); builder.regularization(true).l2(1e-5); builder.updater(Updater.RMSPROP); builder.weightInit(WeightInit.XAVIER); NeuralNetConfiguration.ListBuilder list = builder.list(); for (int i = 0; i < HIDDEN_LAYER_COUNT; i++) { GravesLSTM.Builder hiddenLayerBuilder = new GravesLSTM.Builder(); hiddenLayerBuilder.nIn(i == 0 ? inputs : numHiddenNodes); hiddenLayerBuilder.nOut(numHiddenNodes); hiddenLayerBuilder.activation("tanh"); list.layer(i, hiddenLayerBuilder.build()); } RnnOutputLayer.Builder outputLayer = new RnnOutputLayer.Builder(LossFunction.MCXENT); outputLayer.activation("softmax"); outputLayer.nIn(numHiddenNodes); outputLayer.nOut(outputNum); list.layer(HIDDEN_LAYER_COUNT, outputLayer.build()); list.pretrain(false); list.backprop(true); MultiLayerConfiguration configuration = list.build(); model = new MultiLayerNetwork(configuration); model.init(); //model.setListeners(new ScoreIterationListener(1)); }
From source file:org.ensor.fftmusings.autoencoder.RNNTrainer.java
public static void main(String[] args) throws Exception { MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn"); Random rng = new Random(); RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out); double learningRate = 0.0001; if (args.length != 0) { learningRate = Double.parseDouble(args[0]); }/*from w w w . j av a2s. c om*/ int nGaussians = 8; int labelWidth = iter.totalOutcomes(); int inputWidth = iter.inputColumns(); int lstmLayerSize = 400; int bttLength = 50; LossMixtureDensity costFunction = LossMixtureDensity.builder().gaussians(nGaussians).labelWidth(inputWidth) .build(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .learningRate(learningRate).rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001) .weightInit(WeightInit.XAVIER).list() .layer(0, new GravesLSTM.Builder().nIn(inputWidth).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).build()) // .layer(2, new RnnOutputLayer.Builder() // .nIn(lstmLayerSize) // .nOut((labelWidth + 2) * nGaussians) // .activation(Activation.IDENTITY) // //.lossFunction(LossFunctions.LossFunction.MSE) // .lossFunction(LossMixtureDensity.builder() // .gaussians(nGaussians) // .labelWidth(inputWidth) // .build()) // .updater(Updater.RMSPROP) // .weightInit(WeightInit.DISTRIBUTION) // .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new MixtureDensityRNNOutputLayer.Builder().gaussians(nGaussians).nIn(lstmLayerSize) .nOut(labelWidth).updater(Updater.RMSPROP).build()) .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT) .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(new ScoreIterationListener(System.out)); for (int epoch = 0; epoch < 300; epoch++) { model.fit(iter); iter.reset(); evaluateModel(model, costFunction, stackedAutoencoder, rng, epoch); ModelSerializer.writeModel(model, "stack-timeseries.rnn", true); } }
From source file:org.ensor.fftmusings.autoencoder.RNNTrainer2.java
public static void main(String[] args) throws Exception { MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn"); Random rng = new Random(); RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out); int labels = iter.inputColumns(); int lstmLayerSize = 200; int bttLength = 50; //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1) .rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001).list() .layer(0,/*from w w w . j a va2 s . c o m*/ new GravesLSTM.Builder().nIn(labels).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new RnnOutputLayer.Builder().nIn(lstmLayerSize).nOut(labels).lossFunction(LossFunction.MSE) .updater(Updater.RMSPROP).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT) .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(new ScoreIterationListener(System.out)); for (int epoch = 0; epoch < 300; epoch++) { model.fit(iter); iter.reset(); evaluateModel(model, stackedAutoencoder, rng, epoch); ModelSerializer.writeModel(model, "stack-timeseries.rnn", true); } }
From source file:org.ensor.fftmusings.rnn.qft.SampleLSTM.java
public static MultiLayerNetwork create(File modelFilename, DataSetIterator iter) throws IOException { if (modelFilename.exists()) { return load(modelFilename); }/* w w w .j a v a 2s .com*/ int nOut = iter.totalOutcomes(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .learningRate(0.01).rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).list() .layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .updater(Updater.RMSPROP).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .updater(Updater.RMSPROP).nIn(lstmLayerSize).nOut(nOut) .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.08, 0.08)) .build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.setListeners(new ScoreIterationListener()); ModelSerializer.writeModel(net, modelFilename, true); return net; }
From source file:org.ensor.fftmusings.rnn.RNNFactory.java
public static MultiLayerNetwork create(File modelFilename, CharacterIterator iter) throws IOException { if (modelFilename.exists()) { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(modelFilename); net.clear();/* w w w . jav a 2 s. com*/ net.setListeners(new ScoreIterationListener(System.out)); return net; } int nOut = iter.totalOutcomes(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1) .rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).list() .layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .updater(Updater.RMSPROP).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .updater(Updater.RMSPROP).nIn(lstmLayerSize).nOut(nOut) .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.08, 0.08)) .build()) .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.setListeners(new ScoreIterationListener(System.out)); ModelSerializer.writeModel(net, modelFilename, true); return net; }
From source file:org.ensor.fftmusings.rnn2.GravesLSTMCharModellingExample.java
public static void main(String[] args) throws Exception { int lstmLayerSize = 200; //Number of units in each GravesLSTM layer int miniBatchSize = 32; //Size of mini batch to use when training int exampleLength = 1000; //Length of each training example sequence to use. This could certainly be increased int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters int numEpochs = 30; //Total number of training epochs int generateSamplesEveryNMinibatches = 10; //How frequently to generate samples from the network? 1000 characters / 50 tbptt length: 20 parameter updates per minibatch int nSamplesToGenerate = 4; //Number of samples to generate after each training epoch int nCharactersToSample = 300; //Length of each sample to generate String generationInitialization = null; //Optional character initialization; a random character is used if null // Above is Used to 'prime' the LSTM with a character sequence to continue/complete. // Initialization characters must all be in CharacterIterator.getMinimalCharacterSet() by default Random rng = new Random(12345); //Get a DataSetIterator that handles vectorization of text into something we can use to train // our GravesLSTM network. CharacterIterator iter = getShakespeareIterator(miniBatchSize, exampleLength); int nOut = iter.totalOutcomes(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1) .rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).weightInit(WeightInit.XAVIER) .updater(Updater.RMSPROP).list() .layer(0,/*from w w w . ja v a2 s .c o m*/ new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH) .build()) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength) .tBPTTBackwardLength(tbpttLength).pretrain(false).backprop(true).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.setListeners(new ScoreIterationListener(System.out)); //Print the number of parameters in the network (and for each layer) Layer[] layers = net.getLayers(); int totalNumParams = 0; for (int i = 0; i < layers.length; i++) { int nParams = layers[i].numParams(); System.out.println("Number of parameters in layer " + i + ": " + nParams); totalNumParams += nParams; } System.out.println("Total number of network parameters: " + totalNumParams); //Do training, and then generate and print samples from network int miniBatchNumber = 0; for (int i = 0; i < numEpochs; i++) { System.out.println("Epoch number" + i); while (iter.hasNext()) { DataSet ds = iter.next(); net.fit(ds); System.out.println("Batch number " + miniBatchNumber); if (++miniBatchNumber % generateSamplesEveryNMinibatches == 0) { System.out.println("--------------------"); System.out.println("Completed " + miniBatchNumber + " minibatches of size " + miniBatchSize + "x" + exampleLength + " characters"); System.out.println("Sampling characters from network given initialization \"" + (generationInitialization == null ? "" : generationInitialization) + "\""); String[] samples = sampleCharactersFromNetwork(generationInitialization, net, iter, rng, nCharactersToSample, nSamplesToGenerate); for (int j = 0; j < samples.length; j++) { System.out.println("----- Sample " + j + " -----"); System.out.println(samples[j]); System.out.println(); } } } iter.reset(); //Reset iterator for another epoch } System.out.println("\n\nExample complete"); }
From source file:org.wso2.carbon.ml.rest.api.neuralNetworks.FeedForwardNetwork.java
License:Open Source License
/** * method to map user selected Updater Algorithm to Updater object. * @param updater// w w w. j av a 2 s . c o m * @return an Updater object . */ Updater mapUpdater(String updater) { Updater updaterAlgo = null; switch (updater) { case "sgd": updaterAlgo = Updater.SGD; break; case "adam": updaterAlgo = Updater.ADAM; break; case "adadelta": updaterAlgo = Updater.ADADELTA; break; case "nesterovs": updaterAlgo = Updater.NESTEROVS; break; case "adagrad": updaterAlgo = Updater.ADAGRAD; break; case "rmsprop": updaterAlgo = Updater.RMSPROP; break; case "none": updaterAlgo = Updater.NONE; break; case "custom": updaterAlgo = Updater.CUSTOM; break; default: updaterAlgo = null; break; } return updaterAlgo; }
From source file:seqmodel.RNNModel.java
MultiLayerNetwork buildRNN(AMISentenceIterator iter) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(SEED).iterations(10)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
//.optimizationAlgo(OptimizationAlgorithm.LBFGS)
//.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
.updater(Updater.RMSPROP)
//.updater(Updater.ADAGRAD)
//.updater(Updater.SGD)
//.regularization(true).l2(0.0001)
//.regularization(true).l1(0.001)
.weightInit(WeightInit.RELU)
//.weightInit(WeightInit.UNIFORM)
//.weightInit(WeightInit.XAVIER)
//.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
.gradientNormalizationThreshold(1.0).learningRate(0.001).list(2)
.layer(0, new GravesLSTM.Builder().nIn(numInputDimensions).nOut(NUM_DIMENSIONS_LSTM)
//.activation("softsign").build())
.activation("softmax")
//.activation("tanh")
.build())//from w w w . j a v a 2 s. c om
/*
.layer(1,
new GravesLSTM.Builder()
.nIn(25)
.nOut(NUM_DIMENSIONS_LSTM)
//.activation("softsign").build())
.activation("softmax")
//.activation("tanh")
.build())
*/
.layer(1, new RnnOutputLayer.Builder().activation("softmax")
//.activation("tanh")
//.activation("sigmoid")
//.lossFunction(LossFunctions.LossFunction.MCXENT)
.lossFunction(LossFunctions.LossFunction.RMSE_XENT).nIn(NUM_DIMENSIONS_LSTM).nOut(2) // 2 for binary classification
.build())
.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(TRUNCATED_BPP_LEN)
.tBPTTBackwardLength(TRUNCATED_BPP_LEN)
//.backpropType(BackpropType.Standard)
.pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setListeners(new ScoreIterationListener(1));
return net;
}