List of usage examples for org.deeplearning4j.nn.conf BackpropType TruncatedBPTT
BackpropType TruncatedBPTT
To view the source code for org.deeplearning4j.nn.conf BackpropType TruncatedBPTT.
Click Source Link
From source file:org.ensor.fftmusings.autoencoder.RNNTrainer.java
public static void main(String[] args) throws Exception { MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn"); Random rng = new Random(); RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out); double learningRate = 0.0001; if (args.length != 0) { learningRate = Double.parseDouble(args[0]); }//from ww w .jav a2 s . co m int nGaussians = 8; int labelWidth = iter.totalOutcomes(); int inputWidth = iter.inputColumns(); int lstmLayerSize = 400; int bttLength = 50; LossMixtureDensity costFunction = LossMixtureDensity.builder().gaussians(nGaussians).labelWidth(inputWidth) .build(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .learningRate(learningRate).rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001) .weightInit(WeightInit.XAVIER).list() .layer(0, new GravesLSTM.Builder().nIn(inputWidth).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).build()) // .layer(2, new RnnOutputLayer.Builder() // .nIn(lstmLayerSize) // .nOut((labelWidth + 2) * nGaussians) // .activation(Activation.IDENTITY) // //.lossFunction(LossFunctions.LossFunction.MSE) // .lossFunction(LossMixtureDensity.builder() // .gaussians(nGaussians) // .labelWidth(inputWidth) // .build()) // .updater(Updater.RMSPROP) // .weightInit(WeightInit.DISTRIBUTION) // .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new MixtureDensityRNNOutputLayer.Builder().gaussians(nGaussians).nIn(lstmLayerSize) .nOut(labelWidth).updater(Updater.RMSPROP).build()) .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT) .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(new ScoreIterationListener(System.out)); for (int epoch = 0; epoch < 300; epoch++) { model.fit(iter); iter.reset(); evaluateModel(model, costFunction, stackedAutoencoder, rng, epoch); ModelSerializer.writeModel(model, "stack-timeseries.rnn", true); } }
From source file:org.ensor.fftmusings.autoencoder.RNNTrainer2.java
public static void main(String[] args) throws Exception { MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn"); Random rng = new Random(); RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out); int labels = iter.inputColumns(); int lstmLayerSize = 200; int bttLength = 50; //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1) .rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001).list() .layer(0,/* ww w . j a v a 2s. c o m*/ new GravesLSTM.Builder().nIn(labels).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new RnnOutputLayer.Builder().nIn(lstmLayerSize).nOut(labels).lossFunction(LossFunction.MSE) .updater(Updater.RMSPROP).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT) .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(new ScoreIterationListener(System.out)); for (int epoch = 0; epoch < 300; epoch++) { model.fit(iter); iter.reset(); evaluateModel(model, stackedAutoencoder, rng, epoch); ModelSerializer.writeModel(model, "stack-timeseries.rnn", true); } }
From source file:org.ensor.fftmusings.rnn.RNNFactory.java
public static MultiLayerNetwork create(File modelFilename, CharacterIterator iter) throws IOException { if (modelFilename.exists()) { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(modelFilename); net.clear();//from www . jav a 2 s .c o m net.setListeners(new ScoreIterationListener(System.out)); return net; } int nOut = iter.totalOutcomes(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1) .rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).list() .layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .updater(Updater.RMSPROP).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .updater(Updater.RMSPROP).nIn(lstmLayerSize).nOut(nOut) .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.08, 0.08)) .build()) .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.setListeners(new ScoreIterationListener(System.out)); ModelSerializer.writeModel(net, modelFilename, true); return net; }
From source file:org.ensor.fftmusings.rnn2.GravesLSTMCharModellingExample.java
public static void main(String[] args) throws Exception { int lstmLayerSize = 200; //Number of units in each GravesLSTM layer int miniBatchSize = 32; //Size of mini batch to use when training int exampleLength = 1000; //Length of each training example sequence to use. This could certainly be increased int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters int numEpochs = 30; //Total number of training epochs int generateSamplesEveryNMinibatches = 10; //How frequently to generate samples from the network? 1000 characters / 50 tbptt length: 20 parameter updates per minibatch int nSamplesToGenerate = 4; //Number of samples to generate after each training epoch int nCharactersToSample = 300; //Length of each sample to generate String generationInitialization = null; //Optional character initialization; a random character is used if null // Above is Used to 'prime' the LSTM with a character sequence to continue/complete. // Initialization characters must all be in CharacterIterator.getMinimalCharacterSet() by default Random rng = new Random(12345); //Get a DataSetIterator that handles vectorization of text into something we can use to train // our GravesLSTM network. CharacterIterator iter = getShakespeareIterator(miniBatchSize, exampleLength); int nOut = iter.totalOutcomes(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1) .rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).weightInit(WeightInit.XAVIER) .updater(Updater.RMSPROP).list() .layer(0,//from w w w . j a v a2s .c o m new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH) .build()) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength) .tBPTTBackwardLength(tbpttLength).pretrain(false).backprop(true).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.setListeners(new ScoreIterationListener(System.out)); //Print the number of parameters in the network (and for each layer) Layer[] layers = net.getLayers(); int totalNumParams = 0; for (int i = 0; i < layers.length; i++) { int nParams = layers[i].numParams(); System.out.println("Number of parameters in layer " + i + ": " + nParams); totalNumParams += nParams; } System.out.println("Total number of network parameters: " + totalNumParams); //Do training, and then generate and print samples from network int miniBatchNumber = 0; for (int i = 0; i < numEpochs; i++) { System.out.println("Epoch number" + i); while (iter.hasNext()) { DataSet ds = iter.next(); net.fit(ds); System.out.println("Batch number " + miniBatchNumber); if (++miniBatchNumber % generateSamplesEveryNMinibatches == 0) { System.out.println("--------------------"); System.out.println("Completed " + miniBatchNumber + " minibatches of size " + miniBatchSize + "x" + exampleLength + " characters"); System.out.println("Sampling characters from network given initialization \"" + (generationInitialization == null ? "" : generationInitialization) + "\""); String[] samples = sampleCharactersFromNetwork(generationInitialization, net, iter, rng, nCharactersToSample, nSamplesToGenerate); for (int j = 0; j < samples.length; j++) { System.out.println("----- Sample " + j + " -----"); System.out.println(samples[j]); System.out.println(); } } } iter.reset(); //Reset iterator for another epoch } System.out.println("\n\nExample complete"); }
From source file:seqmodel.RNNModel.java
MultiLayerNetwork buildRNN(AMISentenceIterator iter) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(SEED).iterations(10)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
//.optimizationAlgo(OptimizationAlgorithm.LBFGS)
//.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
.updater(Updater.RMSPROP)/*from w w w . j a v a 2 s.c om*/
//.updater(Updater.ADAGRAD)
//.updater(Updater.SGD)
//.regularization(true).l2(0.0001)
//.regularization(true).l1(0.001)
.weightInit(WeightInit.RELU)
//.weightInit(WeightInit.UNIFORM)
//.weightInit(WeightInit.XAVIER)
//.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
.gradientNormalizationThreshold(1.0).learningRate(0.001).list(2)
.layer(0, new GravesLSTM.Builder().nIn(numInputDimensions).nOut(NUM_DIMENSIONS_LSTM)
//.activation("softsign").build())
.activation("softmax")
//.activation("tanh")
.build())
/*
.layer(1,
new GravesLSTM.Builder()
.nIn(25)
.nOut(NUM_DIMENSIONS_LSTM)
//.activation("softsign").build())
.activation("softmax")
//.activation("tanh")
.build())
*/
.layer(1, new RnnOutputLayer.Builder().activation("softmax")
//.activation("tanh")
//.activation("sigmoid")
//.lossFunction(LossFunctions.LossFunction.MCXENT)
.lossFunction(LossFunctions.LossFunction.RMSE_XENT).nIn(NUM_DIMENSIONS_LSTM).nOut(2) // 2 for binary classification
.build())
.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(TRUNCATED_BPP_LEN)
.tBPTTBackwardLength(TRUNCATED_BPP_LEN)
//.backpropType(BackpropType.Standard)
.pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setListeners(new ScoreIterationListener(1));
return net;
}