List of usage examples for org.deeplearning4j.nn.conf NeuralNetConfiguration.Builder miniBatch
boolean miniBatch
To view the source code for org.deeplearning4j.nn.conf NeuralNetConfiguration.Builder miniBatch.
Click Source Link
From source file:aiLogicImplementation.RNNBasic.java
License:Apache License
public void create(StatsStorage statsStorage) { NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder(); builder.iterations(1000);/* ww w . j a v a 2 s . com*/ builder.learningRate(0.01); builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); builder.seed(123); builder.biasInit(0.2); builder.miniBatch(true); // builder.updater(Updater.NESTEROVS); builder.updater(new Nesterovs(0.01)); builder.weightInit(WeightInit.XAVIER); builder.regularization(true); builder.l2(0.001); // builder.gradientNormalization(GradientNormalization.ClipL2PerParamType); // builder.gradientNormalizationThreshold(0.5); ListBuilder listBuilder = builder.list(); for (int i = 0; i < HIDDEN_LAYER_CONT; i++) { GravesLSTM.Builder hiddenLayerBuilder = new GravesLSTM.Builder(); hiddenLayerBuilder.nIn(i == 0 ? NUMBER_OF_FEATURE_INPUT : HIDDEN_LAYER_WIDTH); hiddenLayerBuilder.nOut(HIDDEN_LAYER_WIDTH); if (i == 0) { hiddenLayerBuilder.activation(Activation.TANH); } else { hiddenLayerBuilder.activation(Activation.TANH); } listBuilder.layer(i, hiddenLayerBuilder.build()); } // RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.XENT); RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.L2); // RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.MCXENT); // outputLayerBuilder.activation(Activation.SIGMOID); outputLayerBuilder.activation(Activation.RELU); // outputLayerBuilder.activation(Activation.SOFTMAX); outputLayerBuilder.nIn(HIDDEN_LAYER_WIDTH); outputLayerBuilder.nOut(NUMBER_OF_FEATURE_OUTPUT); outputLayerBuilder.weightInit(WeightInit.XAVIER); // outputLayerBuilder.dist(new UniformDistribution(0, 1)); listBuilder.layer(HIDDEN_LAYER_CONT, outputLayerBuilder.build()); // listBuilder.backpropType(BackpropType.TruncatedBPTT); // listBuilder.tBPTTForwardLength(tbpttLength); // listBuilder.tBPTTBackwardLength(tbpttLength); listBuilder.pretrain(false); listBuilder.backprop(true); // create network MultiLayerConfiguration conf = listBuilder.build(); net = new MultiLayerNetwork(conf); net.init(); // net.setListeners(new ScoreIterationListener(configuration.getListenerFrequency())); //Then add the StatsListener to collect this information from the network, as it trains List list = new ArrayList(); list.add(new StatsListener(statsStorage, configuration.getListenerFrequency())); list.add(new ScoreIterationListener(configuration.getListenerFrequency())); net.setListeners(list); /* * CREATE OUR TRAINING DATA */ input = Nd4j.zeros(maze.length * maze.length * hibertMaze.getHibertMazeGUI().getAlPlayers().size() * hibertMaze.getHibertMazeGUI().getAlFoods().size(), NUMBER_OF_FEATURE_INPUT); labels = Nd4j.zeros(maze.length * maze.length * hibertMaze.getHibertMazeGUI().getAlPlayers().size() * hibertMaze.getHibertMazeGUI().getAlFoods().size(), NUMBER_OF_FEATURE_OUTPUT); positionMoving = new PositionMoving(maze); int counter = 0; for (int i = 0; i < maze.length; i++) { for (int j = 0; j < maze[i].length; j++) { int mazeFieldValue = maze[i][j]; // input neuron for current-char is 1 at "samplePos" for (Player player : hibertMaze.getHibertMazeGUI().getAlPlayers()) { for (Food food : hibertMaze.getHibertMazeGUI().getAlFoods()) { Position position = new Position(i, j); input.putScalar(counter, 0, i); input.putScalar(counter, 1, j); input.putScalar(counter, 2, mazeFieldValue); input.putScalar(counter, 3, player.getPlayerNumber()); input.putScalar(counter, 4, i); input.putScalar(counter, 5, j); input.putScalar(counter, 6, food.getFoodNumber()); input.putScalar(counter, 7, food.getPosition().getX()); input.putScalar(counter, 8, food.getPosition().getY()); input.putScalar(counter, 9, positionMoving.moveNorth(position) ? 1.00d : 0.00d); input.putScalar(counter, 10, positionMoving.moveSouth(position) ? 1.00d : 0.00d); input.putScalar(counter, 11, positionMoving.moveEast(position) ? 1.00d : 0.00d); input.putScalar(counter, 12, positionMoving.moveWest(position) ? 1.00d : 0.00d); input.putScalar(counter, 13, player.getPosition().getX() == food.getPosition().getX() && player.getPosition().getY() == food.getPosition().getY() ? 1.00d : 0.00d); labels.putScalar(counter, 0, positionMoving.moveNorth(position) ? 1.00d : 0.00d); labels.putScalar(counter, 1, positionMoving.moveSouth(position) ? 1.00d : 0.00d); labels.putScalar(counter, 2, positionMoving.moveEast(position) ? 1.00d : 0.00d); labels.putScalar(counter, 3, positionMoving.moveWest(position) ? 1.00d : 0.00d); counter = counter + 1; } } } } trainingData = new DataSet(input, labels); }
From source file:com.javafxpert.neuralnetviz.scenario.BasicRNNExample.java
License:Apache License
public static MultiLayerNetworkEnhanced buildNetwork(WebSocketSession webSocketSession) throws Exception { //public static void main(String[] args) { // create a dedicated list of possible chars in LEARNSTRING_CHARS_LIST LinkedHashSet<Character> LEARNSTRING_CHARS = new LinkedHashSet<Character>(); for (char c : LEARNSTRING) LEARNSTRING_CHARS.add(c);/*from ww w . j av a 2 s . c o m*/ LEARNSTRING_CHARS_LIST.addAll(LEARNSTRING_CHARS); System.out.println("LEARNSTRING_CHARS_LIST: " + LEARNSTRING_CHARS_LIST); // some common parameters NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder(); builder.iterations(10); builder.learningRate(0.001); builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); builder.seed(123); builder.biasInit(0); builder.miniBatch(false); builder.updater(Updater.RMSPROP); builder.weightInit(WeightInit.XAVIER); ListBuilder listBuilder = builder.list(); // first difference, for rnns we need to use GravesLSTM.Builder for (int i = 0; i < HIDDEN_LAYER_CONT; i++) { GravesLSTM.Builder hiddenLayerBuilder = new GravesLSTM.Builder(); hiddenLayerBuilder.nIn(i == 0 ? LEARNSTRING_CHARS.size() : HIDDEN_LAYER_WIDTH); hiddenLayerBuilder.nOut(HIDDEN_LAYER_WIDTH); // adopted activation function from GravesLSTMCharModellingExample // seems to work well with RNNs hiddenLayerBuilder.activation("tanh"); listBuilder.layer(i, hiddenLayerBuilder.build()); } // we need to use RnnOutputLayer for our RNN RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.MCXENT); // softmax normalizes the output neurons, the sum of all outputs is 1 // this is required for our sampleFromDistribution-function outputLayerBuilder.activation("softmax"); outputLayerBuilder.nIn(HIDDEN_LAYER_WIDTH); outputLayerBuilder.nOut(LEARNSTRING_CHARS.size()); listBuilder.layer(HIDDEN_LAYER_CONT, outputLayerBuilder.build()); // finish builder listBuilder.pretrain(false); listBuilder.backprop(true); listBuilder.build(); // create network MultiLayerConfiguration conf = listBuilder.build(); String[] inputFeatureNames = {}; String[] outputLabelNames = {}; MultiLayerNetworkEnhanced net = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames); net.init(); //net.setListeners(new ScoreIterationListener(1)); net.setListeners(new ModelListener(100, webSocketSession)); /* * CREATE OUR TRAINING DATA */ // create input and output arrays: SAMPLE_INDEX, INPUT_NEURON, // SEQUENCE_POSITION INDArray input = Nd4j.zeros(1, LEARNSTRING_CHARS_LIST.size(), LEARNSTRING.length); INDArray labels = Nd4j.zeros(1, LEARNSTRING_CHARS_LIST.size(), LEARNSTRING.length); // loop through our sample-sentence int samplePos = 0; for (char currentChar : LEARNSTRING) { // small hack: when currentChar is the last, take the first char as // nextChar - not really required char nextChar = LEARNSTRING[(samplePos + 1) % (LEARNSTRING.length)]; // input neuron for current-char is 1 at "samplePos" input.putScalar(new int[] { 0, LEARNSTRING_CHARS_LIST.indexOf(currentChar), samplePos }, 1); // output neuron for next-char is 1 at "samplePos" labels.putScalar(new int[] { 0, LEARNSTRING_CHARS_LIST.indexOf(nextChar), samplePos }, 1); samplePos++; } DataSet trainingData = new DataSet(input, labels); // some epochs for (int epoch = 0; epoch < 100; epoch++) { System.out.println("Epoch " + epoch); // train the data net.fit(trainingData); // clear current stance from the last example net.rnnClearPreviousState(); // put the first caracter into the rrn as an initialisation INDArray testInit = Nd4j.zeros(LEARNSTRING_CHARS_LIST.size()); testInit.putScalar(LEARNSTRING_CHARS_LIST.indexOf(LEARNSTRING[0]), 1); // run one step -> IMPORTANT: rnnTimeStep() must be called, not // output() // the output shows what the net thinks what should come next INDArray output = net.rnnTimeStep(testInit); // now the net sould guess LEARNSTRING.length mor characters for (int j = 0; j < LEARNSTRING.length; j++) { // first process the last output of the network to a concrete // neuron, the neuron with the highest output cas the highest // cance to get chosen double[] outputProbDistribution = new double[LEARNSTRING_CHARS.size()]; for (int k = 0; k < outputProbDistribution.length; k++) { outputProbDistribution[k] = output.getDouble(k); } int sampledCharacterIdx = findIndexOfHighestValue(outputProbDistribution); // print the chosen output System.out.print(LEARNSTRING_CHARS_LIST.get(sampledCharacterIdx)); // use the last output as input INDArray nextInput = Nd4j.zeros(LEARNSTRING_CHARS_LIST.size()); nextInput.putScalar(sampledCharacterIdx, 1); output = net.rnnTimeStep(nextInput); } System.out.print("\n"); } return net; }
From source file:com.javafxpert.neuralnetviz.scenario.XorExample.java
License:Apache License
public static MultiLayerNetworkEnhanced buildNetwork(WebSocketSession webSocketSession) throws Exception { //public static void main(String[] args) throws Exception { //System.out.println("In XorExample.go()"); // list off input values, 4 training samples with data for 2 // input-neurons each INDArray input = Nd4j.zeros(4, 2);// www . j a v a 2 s.c o m //System.out.println("After INDArray input: " + input); // correspondending list with expected output values, 4 training samples // with data for 2 output-neurons each INDArray labels = Nd4j.zeros(4, 2); // create first dataset // when first input=0 and second input=0 input.putScalar(new int[] { 0, 0 }, 0); input.putScalar(new int[] { 0, 1 }, 0); // then the first output fires for false, and the second is 0 (see class // comment) labels.putScalar(new int[] { 0, 0 }, 1); labels.putScalar(new int[] { 0, 1 }, 0); // when first input=1 and second input=0 input.putScalar(new int[] { 1, 0 }, 1); input.putScalar(new int[] { 1, 1 }, 0); // then xor is true, therefore the second output neuron fires labels.putScalar(new int[] { 1, 0 }, 0); labels.putScalar(new int[] { 1, 1 }, 1); // same as above input.putScalar(new int[] { 2, 0 }, 0); input.putScalar(new int[] { 2, 1 }, 1); labels.putScalar(new int[] { 2, 0 }, 0); labels.putScalar(new int[] { 2, 1 }, 1); // when both inputs fire, xor is false again - the first output should // fire input.putScalar(new int[] { 3, 0 }, 1); input.putScalar(new int[] { 3, 1 }, 1); labels.putScalar(new int[] { 3, 0 }, 1); labels.putScalar(new int[] { 3, 1 }, 0); //System.out.println("Before DataSet ds"); // create dataset object DataSet ds = new DataSet(input, labels); //System.out.println("After DataSet ds: " + ds); // Set up network configuration NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder(); // how often should the training set be run, we need something above // 1000, or a higher learning-rate - found this values just by trial and // error builder.iterations(10000); // learning rate builder.learningRate(0.1); // fixed seed for the random generator, so any run of this program // brings the same results - may not work if you do something like // ds.shuffle() builder.seed(123); // not applicable, this network is to small - but for bigger networks it // can help that the network will not only recite the training data builder.useDropConnect(false); // a standard algorithm for moving on the error-plane, this one works // best for me, LINE_GRADIENT_DESCENT or CONJUGATE_GRADIENT can do the // job, too - it's an empirical value which one matches best to // your problem builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); // init the bias with 0 - empirical value, too builder.biasInit(0); // from "http://deeplearning4j.org/architecture": The networks can // process the input more quickly and more accurately by ingesting // minibatches 5-10 elements at a time in parallel. // this example runs better without, because the dataset is smaller than // the mini batch size builder.miniBatch(false); // create a multilayer network with 2 layers (including the output // layer, excluding the input payer) ListBuilder listBuilder = builder.list(); DenseLayer.Builder hiddenLayerBuilder = new DenseLayer.Builder(); // two input connections - simultaneously defines the number of input // neurons, because it's the first non-input-layer hiddenLayerBuilder.nIn(2); // number of outgooing connections, nOut simultaneously defines the // number of neurons in this layer hiddenLayerBuilder.nOut(2); // put the output through the sigmoid function, to cap the output // valuebetween 0 and 1 hiddenLayerBuilder.activation("sigmoid"); // random initialize weights with values between 0 and 1 hiddenLayerBuilder.weightInit(WeightInit.DISTRIBUTION); hiddenLayerBuilder.dist(new UniformDistribution(0, 1)); // build and set as layer 0 listBuilder.layer(0, hiddenLayerBuilder.build()); // MCXENT or NEGATIVELOGLIKELIHOOD work ok for this example - this // function calculates the error-value // From homepage: Your net's purpose will determine the loss funtion you // use. For pretraining, choose reconstruction entropy. For // classification, use multiclass cross entropy. Builder outputLayerBuilder = new Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD); // must be the same amout as neurons in the layer before outputLayerBuilder.nIn(2); // two neurons in this layer outputLayerBuilder.nOut(2); outputLayerBuilder.activation("sigmoid"); outputLayerBuilder.weightInit(WeightInit.DISTRIBUTION); outputLayerBuilder.dist(new UniformDistribution(0, 1)); listBuilder.layer(1, outputLayerBuilder.build()); // no pretrain phase for this network listBuilder.pretrain(false); // seems to be mandatory // according to agibsonccc: You typically only use that with // pretrain(true) when you want to do pretrain/finetune without changing // the previous layers finetuned weights that's for autoencoders and // rbms listBuilder.backprop(true); // build and init the network, will check if everything is configured // correct MultiLayerConfiguration conf = listBuilder.build(); String[] inputFeatureNames = { "true (1) or false (0)", "true (1) or false (0)" }; String[] outputLabelNames = { "false", "true" }; MultiLayerNetworkEnhanced net = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames); net.init(); // add an listener which outputs the error every 100 parameter updates //net.setListeners(new ScoreIterationListener(100)); net.setListeners(new ModelListener(100, webSocketSession)); // C&P from GravesLSTMCharModellingExample // Print the number of parameters in the network (and for each layer) Layer[] layers = net.getLayers(); int totalNumParams = 0; for (int i = 0; i < layers.length; i++) { int nParams = layers[i].numParams(); //System.out.println("Number of parameters in layer " + i + ": " + nParams); totalNumParams += nParams; } //System.out.println("Total number of network parameters: " + totalNumParams); // here the actual learning takes place net.fit(ds); // create output for every training sample INDArray output = net.output(ds.getFeatureMatrix()); //System.out.println("output: " + output); for (int i = 0; i < output.rows(); i++) { String actual = ds.getLabels().getRow(i).toString().trim(); String predicted = output.getRow(i).toString().trim(); //System.out.println("actual " + actual + " vs predicted " + predicted); } // let Evaluation prints stats how often the right output had the // highest value Evaluation eval = new Evaluation(2); eval.eval(ds.getLabels(), output); System.out.println(eval.stats()); //displayNetwork(net); // Make prediction INDArray example = Nd4j.zeros(1, 2); // create first dataset // when first input=0 and second input=0 example.putScalar(new int[] { 0, 0 }, 0); example.putScalar(new int[] { 0, 1 }, 1); int[] prediction = net.predict(example); System.out.println("prediction for 0, 1: " + prediction[0]); return net; }