List of usage examples for org.deeplearning4j.nn.multilayer MultiLayerNetwork fit
@Override public void fit(MultiDataSetIterator iterator)
From source file:cnn.image.classification.CNNImageClassification.java
public static void main(String[] args) { int nChannels = 3; int outputNum = 10; // int numExamples = 80; int batchSize = 10; int nEpochs = 20; int iterations = 1; int seed = 123; int height = 32; int width = 32; Random randNumGen = new Random(seed); System.out.println("Load data...."); File parentDir = new File("train1/"); FileSplit filesInDir = new FileSplit(parentDir, allowedExtensions, randNumGen); ParentPathLabelGenerator labelMaker = new ParentPathLabelGenerator(); BalancedPathFilter pathFilter = new BalancedPathFilter(randNumGen, allowedExtensions, labelMaker); //Split the image files into train and test. Specify the train test split as 80%,20% InputSplit[] filesInDirSplit = filesInDir.sample(pathFilter, 100, 0); InputSplit[] filesInDirSplitTest = filesInDir.sample(pathFilter, 0, 100); InputSplit trainData = filesInDirSplit[0]; InputSplit testData = filesInDirSplitTest[1]; System.out.println("train = " + trainData.length()); System.out.println("test = " + testData.length()); //Specifying a new record reader with the height and width you want the images to be resized to. //Note that the images in this example are all of different size //They will all be resized to the height and width specified below ImageRecordReader recordReader = new ImageRecordReader(height, width, nChannels, labelMaker); //Often there is a need to transforming images to artificially increase the size of the dataset recordReader.initialize(trainData);/*from w w w. j av a2s. c o m*/ DataSetIterator dataIterTrain = new RecordReaderDataSetIterator(recordReader, batchSize, 1, outputNum); // recordReader.reset(); recordReader.initialize(testData); DataSetIterator dataIterTest = new RecordReaderDataSetIterator(recordReader, batchSize, 1, outputNum); DataNormalization scaler = new ImagePreProcessingScaler(0, 1); dataIterTrain.setPreProcessor(scaler); dataIterTest.setPreProcessor(scaler); System.out.println("Build model...."); MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) .iterations(iterations).regularization(true).l2(0.0005) // .dropOut(0.5) .learningRate(0.001)//.biasLearningRate(0.02) //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) .weightInit(WeightInit.XAVIER).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(Updater.NESTEROVS).momentum(0.9).list() .layer(0, new ConvolutionLayer.Builder(5, 5).nIn(nChannels).stride(1, 1).nOut(20) .activation("identity").build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2) .build()) .layer(2, new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50).activation("identity").build()) .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(2, 2).build()) .layer(4, new DenseLayer.Builder().activation("relu").nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(outputNum) .activation("softmax").build()) .setInputType(InputType.convolutional(height, width, nChannels)) //See note below .backprop(true).pretrain(false); MultiLayerConfiguration b = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations) .regularization(false).l2(0.005) // tried 0.0001, 0.0005 .learningRate(0.0001) // tried 0.00001, 0.00005, 0.000001 .weightInit(WeightInit.XAVIER).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(Updater.NESTEROVS).momentum(0.9).list().layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(50) // tried 10, 20, 40, 50 .activation("relu").build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(2, 2).build()) .layer(2, new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(100) // tried 25, 50, 100 .activation("relu").build()) .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(2, 2).build()) .layer(4, new DenseLayer.Builder().activation("relu").nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(outputNum) .activation("softmax").build()) .backprop(true).pretrain(false).cnnInputSize(height, width, nChannels).build(); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); System.out.println("Train model...."); model.setListeners(new ScoreIterationListener(1)); // for( int i=0; i<nEpochs; i++ ) { // model.setListeners(new HistogramIterationListener(1)); MultipleEpochsIterator trainIter = new MultipleEpochsIterator(nEpochs, dataIterTrain, 2); model.fit(trainIter); // System.out.println("*** Completed epoch - " + i + " ***"); System.out.println("Evaluate model...."); // Evaluation eval = new Evaluation(outputNum); // while(dataIterTest.hasNext()){ // DataSet ds = dataIterTest.next(); // INDArray output = model.output(ds.getFeatureMatrix(), false); // eval.eval(ds.getLabels(), output); // } // System.out.println(eval.stats()); // dataIterTest.reset(); // } Evaluation eval1 = model.evaluate(dataIterTest); System.out.println(eval1.stats()); System.out.println("****************Example finished********************"); }
From source file:com.example.android.displayingbitmaps.ui.ImageGridActivity.java
License:Apache License
public void trainMLP() throws Exception { Nd4j.ENFORCE_NUMERICAL_STABILITY = true; final int numRows = 28; final int numColumns = 28; int outputNum = 10; int numSamples = 10000; int batchSize = 500; int iterations = 10; int seed = 123; int listenerFreq = iterations / 5; int splitTrainNum = (int) (batchSize * .8); DataSet mnist;// w ww . jav a2 s . c o m SplitTestAndTrain trainTest; DataSet trainInput; List<INDArray> testInput = new ArrayList<>(); List<INDArray> testLabels = new ArrayList<>(); log.info("Load data...."); DataSetIterator mnistIter = new MnistDataSetIterator(batchSize, numSamples, true); log.info("Build model...."); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(iterations) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).learningRate(1e-1f) .momentum(0.5).momentumAfter(Collections.singletonMap(3, 0.9)).useDropConnect(true).list(2) .layer(0, new DenseLayer.Builder().nIn(numRows * numColumns).nOut(1000).activation("relu") .weightInit(WeightInit.XAVIER).build()) .layer(1, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD).nIn(1000).nOut(outputNum) .activation("softmax").weightInit(WeightInit.XAVIER).build()) .build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq))); log.info("Train model...."); model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq))); while (mnistIter.hasNext()) { mnist = mnistIter.next(); trainTest = mnist.splitTestAndTrain(splitTrainNum, new Random(seed)); // train set that is the result trainInput = trainTest.getTrain(); // get feature matrix and labels for training testInput.add(trainTest.getTest().getFeatureMatrix()); testLabels.add(trainTest.getTest().getLabels()); model.fit(trainInput); } log.info("Evaluate model...."); Evaluation eval = new Evaluation(outputNum); for (int i = 0; i < testInput.size(); i++) { INDArray output = model.output(testInput.get(i)); eval.eval(testLabels.get(i), output); } log.info(eval.stats()); log.info("****************Example finished********************"); }
From source file:com.heatonresearch.aifh.examples.ann.LearnXORBackprop.java
License:Apache License
/** * The main method.//from w ww. j a v a2s. c o m * @param args Not used. */ public static void main(String[] args) { int seed = 43; double learningRate = 0.4; int nEpochs = 100; int numInputs = XOR_INPUT[0].length; int numOutputs = XOR_IDEAL[0].length; int numHiddenNodes = 4; // Setup training data. INDArray xorInput = Nd4j.create(XOR_INPUT); INDArray xorIdeal = Nd4j.create(XOR_IDEAL); DataSet xorDataSet = new DataSet(xorInput, xorIdeal); // Create neural network. MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).iterations(1) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(learningRate) .updater(Updater.NESTEROVS).momentum(0.9).list(2) .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes).weightInit(WeightInit.XAVIER) .activation("relu").build()) .layer(1, new OutputLayer.Builder(LossFunction.MSE).weightInit(WeightInit.XAVIER) .activation("identity").nIn(numHiddenNodes).nOut(numOutputs).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(new ScoreIterationListener(1)); // Train for (int n = 0; n < nEpochs; n++) { model.fit(xorDataSet); } // Evaluate System.out.println("Evaluating neural network."); for (int i = 0; i < 4; i++) { INDArray input = xorInput.getRow(i); INDArray output = model.output(input); System.out.println(input + " : " + output); } }
From source file:Dl4j.Doc2VecWithAutoEncoder.java
public static void main(String[] args) throws FileNotFoundException, IOException { if (args.length < 1) { args = new String[1]; args[0] = "/home/procheta/NetBeansProjects/Dl4jTest/src/dl4jtest/init.properties"; }//from w w w . j a v a 2s .com String[] docs = { "The cat sat on the mat", "The dog sat on the mat", "The chicken ate the corn", "The corn was sweet", "The milk was sweet", "The dog sat on the mat", "The cat drank the milk", "The dog ate the bone" }; try { Properties prop = new Properties(); prop.load(new FileReader(args[0])); LuceneDocFetcher luceneDocFetcher; // test loading a simple collection of docs... // Create in-memory index RAMDirectory ramdir = new RAMDirectory(); IndexWriterConfig iwcfg = new IndexWriterConfig(new EnglishAnalyzer()); iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(ramdir, iwcfg); for (String doc : docs) { try { Document lDoc = new Document(); lDoc.add(new Field(LuceneDocFetcher.CONTENET_FIELD_NAME, doc, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); writer.addDocument(lDoc); } catch (Exception e) { } } writer.close(); Path path = Paths.get(prop.getProperty("index")); Directory dir = FSDirectory.open(path); Doc2VecWithAutoEncoder dva = new Doc2VecWithAutoEncoder(); System.out.println(prop.getProperty("depth")); ArrayList<String> docIds; dva.getDocIds(prop.getProperty("qid"), prop.getProperty("qrel")); // docIds = dva.subsample(Integer.parseInt(prop.getProperty("depth")), prop.getProperty("fileList"), prop.getProperty("qid"), prop.getProperty("folderPath")); // dva.saveSampleDocId(docIds, prop.getProperty("sampleOutput")); // pass the in-mem index reader to the vectorizer // luceneDocFetcher = new LuceneDocFetcher(dir, dva.docIds); luceneDocFetcher = new LuceneDocFetcher(dir, dva.docIds, dva.labels); DataSetIterator iter = new BaseDatasetIterator(1, 50, luceneDocFetcher); while (iter.hasNext()) { DataSet v = iter.next(); System.out.println(v.getFeatures()); } // test auto-encoding final int vocabSize = luceneDocFetcher.getDimension(); //int seed = Random.nextInt(vocabSize); int iterations = 2; int listenerFreq = iterations / 5; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() //.seed(seed) .iterations(iterations).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .list(2) .layer(0, new RBM.Builder().nIn(vocabSize).nOut(5) .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) .layer(1, new RBM.Builder().nIn(5).nOut(10).lossFunction(LossFunctions.LossFunction.RMSE_XENT) .build()) //.pretrain(true) //.backprop(true) //.layer(2, new RBM.Builder().nIn(500).nOut(250).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) //.layer(3, new RBM.Builder().nIn(250).nOut(100).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) //.layer(4, new RBM.Builder().nIn(100).nOut(30).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) /* //encoding stops .layer(5, new RBM.Builder().nIn(30).nOut(100).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) //decoding starts .layer(6, new RBM.Builder().nIn(100).nOut(250).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) .layer(7, new RBM.Builder().nIn(250).nOut(500).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) .layer(8, new RBM.Builder().nIn(500).nOut(1000).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) .layer(9, new OutputLayer.Builder(LossFunctions.LossFunction.RMSE_XENT).nIn(1000).nOut(vocabSize).build()) .pretrain(true).backprop(true) */ .build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq))); model.fit(iter); System.out.println("Output layer: "); iter.reset(); while (iter.hasNext()) { DataSet v = iter.next(); // System.out.println(model.output(v.getFeatures())); } //++Procheta iter.reset(); dva.saveModel(iter, prop.getProperty("output"), model);//*/ } catch (Exception ex) { ex.printStackTrace(); } }
From source file:org.audiveris.omrdataset.train.Training.java
License:Open Source License
/** * Perform the training of the neural network. * <p>//ww w. j av a 2s . co m * Before training is launched, if the network model exists on disk it is reloaded, otherwise a * brand new one is created. * * @throws Exception in case of IO problem or interruption */ public void process() throws Exception { Files.createDirectories(MISTAKES_PATH); int nChannels = 1; // Number of input channels int batchSize = 64; // Batch size int nEpochs = 1; //3; //10; //2; // Number of training epochs int iterations = 1; // 2; //10; // Number of training iterations int seed = 123; // // Pixel norms NormalizerStandardize normalizer = NormalizerSerializer.getDefault().restore(PIXELS_PATH.toFile()); // Get the dataset using the record reader. CSVRecordReader handles loading/parsing int labelIndex = CONTEXT_WIDTH * CONTEXT_HEIGHT; // format: all cells then label int numLinesToSkip = 1; // Because of header comment line String delimiter = ","; RecordReader trainRecordReader = new CSVRecordReader(numLinesToSkip, delimiter); trainRecordReader.initialize(new FileSplit(FEATURES_PATH.toFile())); logger.info("Getting dataset from {} ...", FEATURES_PATH); RecordReaderDataSetIterator trainIter = new RecordReaderDataSetIterator(trainRecordReader, batchSize, labelIndex, numClasses, -1); trainIter.setCollectMetaData(true); //Instruct the iterator to collect metadata, and store it in the DataSet objects RecordReader testRecordReader = new CSVRecordReader(numLinesToSkip, delimiter); testRecordReader.initialize(new FileSplit(FEATURES_PATH.toFile())); RecordReaderDataSetIterator testIter = new RecordReaderDataSetIterator(testRecordReader, batchSize, labelIndex, numClasses, -1); testIter.setCollectMetaData(true); //Instruct the iterator to collect metadata, and store it in the DataSet objects // Normalization DataSetPreProcessor preProcessor = new MyPreProcessor(normalizer); trainIter.setPreProcessor(preProcessor); testIter.setPreProcessor(preProcessor); if (false) { System.out.println("\n +++++ Test Set Examples MetaData +++++"); while (testIter.hasNext()) { DataSet ds = testIter.next(); List<RecordMetaData> testMetaData = ds.getExampleMetaData(RecordMetaData.class); for (RecordMetaData recordMetaData : testMetaData) { System.out.println(recordMetaData.getLocation()); } } testIter.reset(); } final MultiLayerNetwork model; if (Files.exists(MODEL_PATH)) { model = ModelSerializer.restoreMultiLayerNetwork(MODEL_PATH.toFile(), false); logger.info("Model restored from {}", MODEL_PATH.toAbsolutePath()); } else { logger.info("Building model from scratch"); MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() // .seed(seed) // .iterations(iterations) // .regularization(true) // .l2(0.0005) // .learningRate(.002) // HB: was .01 initially //.biasLearningRate(0.02) //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) .weightInit(WeightInit.XAVIER) // .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) // .updater(Updater.NESTEROVS).momentum(0.9) // .list() // .layer(0, new ConvolutionLayer.Builder(5, 5) // .name("C0") // .nIn(nChannels) // .stride(1, 1) // .nOut(20) // .activation(Activation.IDENTITY) // .build()) // .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) // .name("S1") // .kernelSize(2, 2) // .stride(2, 2) // .build()) // .layer(2, new ConvolutionLayer.Builder(5, 5) // .name("C2") // .stride(1, 1) // .nOut(50) // .activation(Activation.IDENTITY) // .build()) // .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) // .name("S3") // .kernelSize(2, 2) // .stride(2, 2) // .build()) // .layer(4, new DenseLayer.Builder() // .name("D4") // .nOut(500) // .activation(Activation.RELU) // .build()) // .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) // .name("O5") // .nOut(numClasses) // .activation(Activation.SOFTMAX) // .build()) // .setInputType(InputType.convolutionalFlat(CONTEXT_HEIGHT, CONTEXT_WIDTH, 1)); MultiLayerConfiguration conf = builder.build(); model = new MultiLayerNetwork(conf); model.init(); } // Prepare monitoring UIServer uiServer = null; try { if (true) { //Initialize the user interface backend uiServer = UIServer.getInstance(); //Configure where the network information (gradients, score vs. time etc) is to be stored. Here: store in memory. StatsStorage statsStorage = new InMemoryStatsStorage(); //Alternative: new FileStatsStorage(File), for saving and loading later //Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized uiServer.attach(statsStorage); //Then add the StatsListener to collect this information from the network, as it trains model.setListeners(new StatsListener(statsStorage), new ScoreIterationListener(10)); } else { model.setListeners(new ScoreIterationListener(10)); } logger.info("Training model..."); for (int epoch = 1; epoch <= nEpochs; epoch++) { Path epochFolder = Main.cli.mistakes ? MISTAKES_PATH.resolve("epoch#" + epoch) : null; long start = System.currentTimeMillis(); model.fit(trainIter); long stop = System.currentTimeMillis(); double dur = stop - start; logger.info(String.format("*** End epoch#%d, time: %.0f sec", epoch, dur / 1000)); // Save model ModelSerializer.writeModel(model, MODEL_PATH.toFile(), false); ModelSerializer.addNormalizerToModel(MODEL_PATH.toFile(), normalizer); logger.info("Model+normalizer stored as {}", MODEL_PATH.toAbsolutePath()); // // logger.info("Evaluating model..."); // // Evaluation eval = new Evaluation(OmrShapes.NAMES); // // while (testIter.hasNext()) { // DataSet ds = testIter.next(); // List<RecordMetaData> testMetaData = ds.getExampleMetaData(RecordMetaData.class); // INDArray output = model.output(ds.getFeatureMatrix(), false); // eval.eval(ds.getLabels(), output, testMetaData); // } // // System.out.println(eval.stats()); // testIter.reset(); // // //Get a list of prediction errors, from the Evaluation object // //Prediction errors like this are only available after calling iterator.setCollectMetaData(true) // List<Prediction> mistakes = eval.getPredictionErrors(); // logger.info("Epoch#{} Prediction Errors: {}", epoch, mistakes.size()); // // //We can also load a subset of the data, to a DataSet object: // //Here we load the raw data: // List<RecordMetaData> predictionErrorMetaData = new ArrayList<RecordMetaData>(); // // for (Prediction p : mistakes) { // predictionErrorMetaData.add(p.getRecordMetaData(RecordMetaData.class)); // } // // List<Record> predictionErrorRawData = testRecordReader.loadFromMetaData( // predictionErrorMetaData); // // for (int ie = 0; ie < mistakes.size(); ie++) { // Prediction p = mistakes.get(ie); // List<Writable> rawData = predictionErrorRawData.get(ie).getRecord(); // saveMistake(p, rawData, epochFolder); // } // // // // To avoid long useless sessions... // if (mistakes.isEmpty()) { // logger.info("No mistakes left, training stopped."); // // break; // } } } finally { // Stop monitoring if (uiServer != null) { uiServer.stop(); } } logger.info("****************Example finished********************"); }
From source file:org.ensor.fftmusings.autoencoder.DeepAutoencoder.java
public static void main(String[] args) throws Exception { double learningRate = 0.0001; if (args.length > 0) { learningRate = Double.parseDouble(args[0]); }//ww w. j a v a 2 s. c o m MultiLayerNetwork model = readAutoencoder("data/daa/deepmodel.daa", learningRate); PrintStream log = System.out; DataSetIterator iter = createIterator(); log.println("Train model...."); int epoch = 0; for (int i = 0; i < 100; i++) { model.fit(iter); iter.reset(); evaluateModel(model, epoch); ModelSerializer.writeModel(model, "data/daa/deepmodel.daa", true); epoch++; } }
From source file:org.ensor.fftmusings.autoencoder.RNNTrainer.java
public static void main(String[] args) throws Exception { MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn"); Random rng = new Random(); RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out); double learningRate = 0.0001; if (args.length != 0) { learningRate = Double.parseDouble(args[0]); }//from ww w . j a v a2s . co m int nGaussians = 8; int labelWidth = iter.totalOutcomes(); int inputWidth = iter.inputColumns(); int lstmLayerSize = 400; int bttLength = 50; LossMixtureDensity costFunction = LossMixtureDensity.builder().gaussians(nGaussians).labelWidth(inputWidth) .build(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .learningRate(learningRate).rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001) .weightInit(WeightInit.XAVIER).list() .layer(0, new GravesLSTM.Builder().nIn(inputWidth).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).build()) // .layer(2, new RnnOutputLayer.Builder() // .nIn(lstmLayerSize) // .nOut((labelWidth + 2) * nGaussians) // .activation(Activation.IDENTITY) // //.lossFunction(LossFunctions.LossFunction.MSE) // .lossFunction(LossMixtureDensity.builder() // .gaussians(nGaussians) // .labelWidth(inputWidth) // .build()) // .updater(Updater.RMSPROP) // .weightInit(WeightInit.DISTRIBUTION) // .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new MixtureDensityRNNOutputLayer.Builder().gaussians(nGaussians).nIn(lstmLayerSize) .nOut(labelWidth).updater(Updater.RMSPROP).build()) .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT) .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(new ScoreIterationListener(System.out)); for (int epoch = 0; epoch < 300; epoch++) { model.fit(iter); iter.reset(); evaluateModel(model, costFunction, stackedAutoencoder, rng, epoch); ModelSerializer.writeModel(model, "stack-timeseries.rnn", true); } }
From source file:org.ensor.fftmusings.autoencoder.RNNTrainer2.java
public static void main(String[] args) throws Exception { MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn"); Random rng = new Random(); RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out); int labels = iter.inputColumns(); int lstmLayerSize = 200; int bttLength = 50; //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1) .rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001).list() .layer(0,/* www .j a va 2 s .c om*/ new GravesLSTM.Builder().nIn(labels).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new RnnOutputLayer.Builder().nIn(lstmLayerSize).nOut(labels).lossFunction(LossFunction.MSE) .updater(Updater.RMSPROP).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT) .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(new ScoreIterationListener(System.out)); for (int epoch = 0; epoch < 300; epoch++) { model.fit(iter); iter.reset(); evaluateModel(model, stackedAutoencoder, rng, epoch); ModelSerializer.writeModel(model, "stack-timeseries.rnn", true); } }
From source file:org.ensor.fftmusings.autoencoder.StackTrainer.java
public static void main(String[] args) throws IOException, Exception { MultiLayerNetwork pretrainedLayers[] = new MultiLayerNetwork[6]; pretrainedLayers[0] = ModelSerializer.restoreMultiLayerNetwork("data/daa/model-1024-1200sparse0.01.nn"); pretrainedLayers[1] = ModelSerializer.restoreMultiLayerNetwork("data/daa/model-1200-800sparse0.01.nn"); pretrainedLayers[2] = ModelSerializer.restoreMultiLayerNetwork("data/daa/model-800-400sparse0.01.nn"); pretrainedLayers[3] = ModelSerializer.restoreMultiLayerNetwork("data/daa/model-400-200sparse0.01.nn"); pretrainedLayers[4] = ModelSerializer.restoreMultiLayerNetwork("data/daa/model-200-100sparse0.01.nn"); NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .seed(System.currentTimeMillis()).iterations(1) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).weightInit(WeightInit.XAVIER) .updater(Updater.NESTEROVS).regularization(false).l1(0.000).learningRate(0.0001); //.learningRate(Double.parseDouble(args[0])); int layerNo = 0; NeuralNetConfiguration.ListBuilder listBuilder = builder.list() .layer(layerNo++, new RBM.Builder().nIn(1024).nOut(1200).activation(Activation.SIGMOID).build()) .layer(layerNo++, new RBM.Builder().nIn(1200).nOut(800).activation(Activation.SIGMOID).build()) .layer(layerNo++, new RBM.Builder().nIn(800).nOut(400).activation(Activation.SIGMOID).build()) .layer(layerNo++, new RBM.Builder().nIn(400).nOut(200).activation(Activation.SIGMOID).build()) .layer(layerNo++, new RBM.Builder().nIn(200).nOut(100).activation(Activation.SIGMOID).build()) // .layer(layerNo++, new RBM.Builder() // .nIn(100) // .nOut(50) // .activation(Activation.SIGMOID) // .build()) // .layer(layerNo++, new RBM.Builder() // .nIn(50) // .nOut(100) // .activation(Activation.SIGMOID) // .build()) .layer(layerNo++, new RBM.Builder().nIn(100).nOut(200).activation(Activation.SIGMOID).build()) .layer(layerNo++, new RBM.Builder().nIn(200).nOut(400).activation(Activation.SIGMOID).build()) .layer(layerNo++, new RBM.Builder().nIn(400).nOut(800).activation(Activation.SIGMOID).build()) .layer(layerNo++, new RBM.Builder().nIn(800).nOut(1200).activation(Activation.SIGMOID).build()) .layer(layerNo++,/*from w ww. j a v a 2 s. co m*/ new OutputLayer.Builder().nIn(1200).nOut(1024).activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.L2).build()) .pretrain(false).backprop(true); MultiLayerConfiguration conf = listBuilder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(System.out))); for (layerNo = 0; layerNo < 5; layerNo++) { model.getLayer(layerNo).setParam(PretrainParamInitializer.BIAS_KEY, pretrainedLayers[layerNo].getLayer(0).getParam(PretrainParamInitializer.BIAS_KEY)); model.getLayer(layerNo).setParam(PretrainParamInitializer.WEIGHT_KEY, pretrainedLayers[layerNo].getLayer(0).getParam(PretrainParamInitializer.WEIGHT_KEY)); model.getLayer(model.getnLayers() - layerNo - 1).setParam(PretrainParamInitializer.BIAS_KEY, pretrainedLayers[layerNo].getLayer(1).getParam(PretrainParamInitializer.BIAS_KEY)); model.getLayer(model.getnLayers() - layerNo - 1).setParam(PretrainParamInitializer.WEIGHT_KEY, pretrainedLayers[layerNo].getLayer(1).getParam(PretrainParamInitializer.WEIGHT_KEY)); } DataSetIterator iter = new FFTDataIterator(new Random(), 100, 1250, System.out); int epoch = 0; for (int i = 0; i < 300; i++) { model.fit(iter); iter.reset(); evaluateModel(model, epoch); ModelSerializer.writeModel(model, "stack.rnn", true); epoch++; } }
From source file:org.ensor.fftmusings.rnn.GravesLSTMCharModellingExample.java
public static void main(String[] args) throws Exception { int numEpochs = 30; //Total number of training + sample generation epochs String generationInitialization = null; //Optional character initialization; a random character is used if null int nSamplesToGenerate = 4; //Number of samples to generate after each training epoch int nCharactersToSample = 300; //Length of each sample to generate Random rng = new Random(12345); int miniBatchSize = 32; //Size of mini batch to use when training int examplesPerEpoch = 50 * miniBatchSize; //i.e., how many examples to learn on between generating samples int exampleLength = 100; //Length of each training example //Get a DataSetIterator that handles vectorization of text into something we can use to train // our GravesLSTM network. CharacterIterator iter = getShakespeareIterator(miniBatchSize, exampleLength, examplesPerEpoch); File modelFilename = new File("data/shakespere/shakespere.3.rnn"); MultiLayerNetwork net = RNNFactory.create(modelFilename, iter); //Print the number of parameters in the network (and for each layer) Layer[] layers = net.getLayers();// w w w. j a v a 2 s.c o m int totalNumParams = 0; for (int i = 0; i < layers.length; i++) { int nParams = layers[i].numParams(); System.out.println("Number of parameters in layer " + i + ": " + nParams); totalNumParams += nParams; } System.out.println("Total number of network parameters: " + totalNumParams); //Do training, and then generate and print samples from network for (int i = 0; i < numEpochs; i++) { net.fit(iter); System.out.println("--------------------"); System.out.println("Completed epoch " + i); System.out.println("Sampling characters from network given initialization \"" + (generationInitialization == null ? "" : generationInitialization) + "\""); for (int j = 0; j < nSamplesToGenerate; j++) { String samples = sampleCharactersFromNetwork2(generationInitialization, net, iter, rng, nCharactersToSample); System.out.println("----- Sample " + j + " -----"); System.out.println(samples); System.out.println(); } RNNFactory.persist(modelFilename, net); iter.reset(); //Reset iterator for another epoch } System.out.println("\n\nExample complete"); }