Example usage for org.deeplearning4j.nn.conf NeuralNetConfiguration.Builder miniBatch

List of usage examples for org.deeplearning4j.nn.conf NeuralNetConfiguration.Builder miniBatch

Introduction

In this page you can find the example usage for org.deeplearning4j.nn.conf NeuralNetConfiguration.Builder miniBatch.

Prototype

boolean miniBatch

To view the source code for org.deeplearning4j.nn.conf NeuralNetConfiguration.Builder miniBatch.

Click Source Link

Usage

From source file:aiLogicImplementation.RNNBasic.java

License:Apache License

public void create(StatsStorage statsStorage) {
    NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder();
    builder.iterations(1000);/* ww w .  j a  v a 2  s .  com*/
    builder.learningRate(0.01);
    builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT);
    builder.seed(123);
    builder.biasInit(0.2);
    builder.miniBatch(true);
    //        builder.updater(Updater.NESTEROVS);
    builder.updater(new Nesterovs(0.01));
    builder.weightInit(WeightInit.XAVIER);
    builder.regularization(true);
    builder.l2(0.001);
    //        builder.gradientNormalization(GradientNormalization.ClipL2PerParamType);
    //        builder.gradientNormalizationThreshold(0.5);

    ListBuilder listBuilder = builder.list();

    for (int i = 0; i < HIDDEN_LAYER_CONT; i++) {
        GravesLSTM.Builder hiddenLayerBuilder = new GravesLSTM.Builder();
        hiddenLayerBuilder.nIn(i == 0 ? NUMBER_OF_FEATURE_INPUT : HIDDEN_LAYER_WIDTH);
        hiddenLayerBuilder.nOut(HIDDEN_LAYER_WIDTH);
        if (i == 0) {
            hiddenLayerBuilder.activation(Activation.TANH);
        } else {
            hiddenLayerBuilder.activation(Activation.TANH);
        }
        listBuilder.layer(i, hiddenLayerBuilder.build());
    }

    //        RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.XENT);
    RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.L2);
    //        RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.MCXENT);
    //        outputLayerBuilder.activation(Activation.SIGMOID);
    outputLayerBuilder.activation(Activation.RELU);
    //        outputLayerBuilder.activation(Activation.SOFTMAX);
    outputLayerBuilder.nIn(HIDDEN_LAYER_WIDTH);
    outputLayerBuilder.nOut(NUMBER_OF_FEATURE_OUTPUT);
    outputLayerBuilder.weightInit(WeightInit.XAVIER);
    //        outputLayerBuilder.dist(new UniformDistribution(0, 1));
    listBuilder.layer(HIDDEN_LAYER_CONT, outputLayerBuilder.build());
    //        listBuilder.backpropType(BackpropType.TruncatedBPTT);
    //        listBuilder.tBPTTForwardLength(tbpttLength);
    //        listBuilder.tBPTTBackwardLength(tbpttLength);

    listBuilder.pretrain(false);
    listBuilder.backprop(true);

    // create network
    MultiLayerConfiguration conf = listBuilder.build();
    net = new MultiLayerNetwork(conf);
    net.init();

    //        net.setListeners(new ScoreIterationListener(configuration.getListenerFrequency()));
    //Then add the StatsListener to collect this information from the network, as it trains
    List list = new ArrayList();
    list.add(new StatsListener(statsStorage, configuration.getListenerFrequency()));
    list.add(new ScoreIterationListener(configuration.getListenerFrequency()));
    net.setListeners(list);

    /*
       * CREATE OUR TRAINING DATA
     */
    input = Nd4j.zeros(maze.length * maze.length * hibertMaze.getHibertMazeGUI().getAlPlayers().size()
            * hibertMaze.getHibertMazeGUI().getAlFoods().size(), NUMBER_OF_FEATURE_INPUT);
    labels = Nd4j.zeros(maze.length * maze.length * hibertMaze.getHibertMazeGUI().getAlPlayers().size()
            * hibertMaze.getHibertMazeGUI().getAlFoods().size(), NUMBER_OF_FEATURE_OUTPUT);
    positionMoving = new PositionMoving(maze);
    int counter = 0;
    for (int i = 0; i < maze.length; i++) {
        for (int j = 0; j < maze[i].length; j++) {
            int mazeFieldValue = maze[i][j];
            // input neuron for current-char is 1 at "samplePos"
            for (Player player : hibertMaze.getHibertMazeGUI().getAlPlayers()) {
                for (Food food : hibertMaze.getHibertMazeGUI().getAlFoods()) {
                    Position position = new Position(i, j);
                    input.putScalar(counter, 0, i);
                    input.putScalar(counter, 1, j);
                    input.putScalar(counter, 2, mazeFieldValue);
                    input.putScalar(counter, 3, player.getPlayerNumber());
                    input.putScalar(counter, 4, i);
                    input.putScalar(counter, 5, j);
                    input.putScalar(counter, 6, food.getFoodNumber());
                    input.putScalar(counter, 7, food.getPosition().getX());
                    input.putScalar(counter, 8, food.getPosition().getY());
                    input.putScalar(counter, 9, positionMoving.moveNorth(position) ? 1.00d : 0.00d);
                    input.putScalar(counter, 10, positionMoving.moveSouth(position) ? 1.00d : 0.00d);
                    input.putScalar(counter, 11, positionMoving.moveEast(position) ? 1.00d : 0.00d);
                    input.putScalar(counter, 12, positionMoving.moveWest(position) ? 1.00d : 0.00d);
                    input.putScalar(counter, 13,
                            player.getPosition().getX() == food.getPosition().getX()
                                    && player.getPosition().getY() == food.getPosition().getY() ? 1.00d
                                            : 0.00d);

                    labels.putScalar(counter, 0, positionMoving.moveNorth(position) ? 1.00d : 0.00d);
                    labels.putScalar(counter, 1, positionMoving.moveSouth(position) ? 1.00d : 0.00d);
                    labels.putScalar(counter, 2, positionMoving.moveEast(position) ? 1.00d : 0.00d);
                    labels.putScalar(counter, 3, positionMoving.moveWest(position) ? 1.00d : 0.00d);
                    counter = counter + 1;
                }
            }
        }
    }
    trainingData = new DataSet(input, labels);
}

From source file:com.javafxpert.neuralnetviz.scenario.BasicRNNExample.java

License:Apache License

public static MultiLayerNetworkEnhanced buildNetwork(WebSocketSession webSocketSession) throws Exception {
    //public static void main(String[] args) {

    // create a dedicated list of possible chars in LEARNSTRING_CHARS_LIST
    LinkedHashSet<Character> LEARNSTRING_CHARS = new LinkedHashSet<Character>();
    for (char c : LEARNSTRING)
        LEARNSTRING_CHARS.add(c);/*from   ww  w . j av a  2 s . c  o m*/
    LEARNSTRING_CHARS_LIST.addAll(LEARNSTRING_CHARS);

    System.out.println("LEARNSTRING_CHARS_LIST: " + LEARNSTRING_CHARS_LIST);

    // some common parameters
    NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder();
    builder.iterations(10);
    builder.learningRate(0.001);
    builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT);
    builder.seed(123);
    builder.biasInit(0);
    builder.miniBatch(false);
    builder.updater(Updater.RMSPROP);
    builder.weightInit(WeightInit.XAVIER);

    ListBuilder listBuilder = builder.list();

    // first difference, for rnns we need to use GravesLSTM.Builder
    for (int i = 0; i < HIDDEN_LAYER_CONT; i++) {
        GravesLSTM.Builder hiddenLayerBuilder = new GravesLSTM.Builder();
        hiddenLayerBuilder.nIn(i == 0 ? LEARNSTRING_CHARS.size() : HIDDEN_LAYER_WIDTH);
        hiddenLayerBuilder.nOut(HIDDEN_LAYER_WIDTH);
        // adopted activation function from GravesLSTMCharModellingExample
        // seems to work well with RNNs
        hiddenLayerBuilder.activation("tanh");
        listBuilder.layer(i, hiddenLayerBuilder.build());
    }

    // we need to use RnnOutputLayer for our RNN
    RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.MCXENT);
    // softmax normalizes the output neurons, the sum of all outputs is 1
    // this is required for our sampleFromDistribution-function
    outputLayerBuilder.activation("softmax");
    outputLayerBuilder.nIn(HIDDEN_LAYER_WIDTH);
    outputLayerBuilder.nOut(LEARNSTRING_CHARS.size());
    listBuilder.layer(HIDDEN_LAYER_CONT, outputLayerBuilder.build());

    // finish builder
    listBuilder.pretrain(false);
    listBuilder.backprop(true);
    listBuilder.build();

    // create network
    MultiLayerConfiguration conf = listBuilder.build();

    String[] inputFeatureNames = {};
    String[] outputLabelNames = {};
    MultiLayerNetworkEnhanced net = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames);
    net.init();
    //net.setListeners(new ScoreIterationListener(1));
    net.setListeners(new ModelListener(100, webSocketSession));

    /*
     * CREATE OUR TRAINING DATA
     */
    // create input and output arrays: SAMPLE_INDEX, INPUT_NEURON,
    // SEQUENCE_POSITION
    INDArray input = Nd4j.zeros(1, LEARNSTRING_CHARS_LIST.size(), LEARNSTRING.length);
    INDArray labels = Nd4j.zeros(1, LEARNSTRING_CHARS_LIST.size(), LEARNSTRING.length);
    // loop through our sample-sentence
    int samplePos = 0;
    for (char currentChar : LEARNSTRING) {
        // small hack: when currentChar is the last, take the first char as
        // nextChar - not really required
        char nextChar = LEARNSTRING[(samplePos + 1) % (LEARNSTRING.length)];
        // input neuron for current-char is 1 at "samplePos"
        input.putScalar(new int[] { 0, LEARNSTRING_CHARS_LIST.indexOf(currentChar), samplePos }, 1);
        // output neuron for next-char is 1 at "samplePos"
        labels.putScalar(new int[] { 0, LEARNSTRING_CHARS_LIST.indexOf(nextChar), samplePos }, 1);
        samplePos++;
    }
    DataSet trainingData = new DataSet(input, labels);

    // some epochs
    for (int epoch = 0; epoch < 100; epoch++) {

        System.out.println("Epoch " + epoch);

        // train the data
        net.fit(trainingData);

        // clear current stance from the last example
        net.rnnClearPreviousState();

        // put the first caracter into the rrn as an initialisation
        INDArray testInit = Nd4j.zeros(LEARNSTRING_CHARS_LIST.size());
        testInit.putScalar(LEARNSTRING_CHARS_LIST.indexOf(LEARNSTRING[0]), 1);

        // run one step -> IMPORTANT: rnnTimeStep() must be called, not
        // output()
        // the output shows what the net thinks what should come next
        INDArray output = net.rnnTimeStep(testInit);

        // now the net sould guess LEARNSTRING.length mor characters
        for (int j = 0; j < LEARNSTRING.length; j++) {

            // first process the last output of the network to a concrete
            // neuron, the neuron with the highest output cas the highest
            // cance to get chosen
            double[] outputProbDistribution = new double[LEARNSTRING_CHARS.size()];
            for (int k = 0; k < outputProbDistribution.length; k++) {
                outputProbDistribution[k] = output.getDouble(k);
            }
            int sampledCharacterIdx = findIndexOfHighestValue(outputProbDistribution);

            // print the chosen output
            System.out.print(LEARNSTRING_CHARS_LIST.get(sampledCharacterIdx));

            // use the last output as input
            INDArray nextInput = Nd4j.zeros(LEARNSTRING_CHARS_LIST.size());
            nextInput.putScalar(sampledCharacterIdx, 1);
            output = net.rnnTimeStep(nextInput);

        }
        System.out.print("\n");

    }
    return net;
}

From source file:com.javafxpert.neuralnetviz.scenario.XorExample.java

License:Apache License

public static MultiLayerNetworkEnhanced buildNetwork(WebSocketSession webSocketSession) throws Exception {
    //public static void main(String[] args) throws  Exception {

    //System.out.println("In XorExample.go()");

    // list off input values, 4 training samples with data for 2
    // input-neurons each
    INDArray input = Nd4j.zeros(4, 2);//  www .  j  a v  a 2  s.c o m

    //System.out.println("After INDArray input: " + input);

    // correspondending list with expected output values, 4 training samples
    // with data for 2 output-neurons each
    INDArray labels = Nd4j.zeros(4, 2);

    // create first dataset
    // when first input=0 and second input=0
    input.putScalar(new int[] { 0, 0 }, 0);
    input.putScalar(new int[] { 0, 1 }, 0);
    // then the first output fires for false, and the second is 0 (see class
    // comment)
    labels.putScalar(new int[] { 0, 0 }, 1);
    labels.putScalar(new int[] { 0, 1 }, 0);

    // when first input=1 and second input=0
    input.putScalar(new int[] { 1, 0 }, 1);
    input.putScalar(new int[] { 1, 1 }, 0);
    // then xor is true, therefore the second output neuron fires
    labels.putScalar(new int[] { 1, 0 }, 0);
    labels.putScalar(new int[] { 1, 1 }, 1);

    // same as above
    input.putScalar(new int[] { 2, 0 }, 0);
    input.putScalar(new int[] { 2, 1 }, 1);
    labels.putScalar(new int[] { 2, 0 }, 0);
    labels.putScalar(new int[] { 2, 1 }, 1);

    // when both inputs fire, xor is false again - the first output should
    // fire
    input.putScalar(new int[] { 3, 0 }, 1);
    input.putScalar(new int[] { 3, 1 }, 1);
    labels.putScalar(new int[] { 3, 0 }, 1);
    labels.putScalar(new int[] { 3, 1 }, 0);

    //System.out.println("Before DataSet ds");

    // create dataset object
    DataSet ds = new DataSet(input, labels);

    //System.out.println("After DataSet ds: " + ds);

    // Set up network configuration
    NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder();
    // how often should the training set be run, we need something above
    // 1000, or a higher learning-rate - found this values just by trial and
    // error
    builder.iterations(10000);
    // learning rate
    builder.learningRate(0.1);
    // fixed seed for the random generator, so any run of this program
    // brings the same results - may not work if you do something like
    // ds.shuffle()
    builder.seed(123);
    // not applicable, this network is to small - but for bigger networks it
    // can help that the network will not only recite the training data
    builder.useDropConnect(false);
    // a standard algorithm for moving on the error-plane, this one works
    // best for me, LINE_GRADIENT_DESCENT or CONJUGATE_GRADIENT can do the
    // job, too - it's an empirical value which one matches best to
    // your problem
    builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT);
    // init the bias with 0 - empirical value, too
    builder.biasInit(0);
    // from "http://deeplearning4j.org/architecture": The networks can
    // process the input more quickly and more accurately by ingesting
    // minibatches 5-10 elements at a time in parallel.
    // this example runs better without, because the dataset is smaller than
    // the mini batch size
    builder.miniBatch(false);

    // create a multilayer network with 2 layers (including the output
    // layer, excluding the input payer)
    ListBuilder listBuilder = builder.list();

    DenseLayer.Builder hiddenLayerBuilder = new DenseLayer.Builder();
    // two input connections - simultaneously defines the number of input
    // neurons, because it's the first non-input-layer
    hiddenLayerBuilder.nIn(2);
    // number of outgooing connections, nOut simultaneously defines the
    // number of neurons in this layer
    hiddenLayerBuilder.nOut(2);
    // put the output through the sigmoid function, to cap the output
    // valuebetween 0 and 1
    hiddenLayerBuilder.activation("sigmoid");
    // random initialize weights with values between 0 and 1
    hiddenLayerBuilder.weightInit(WeightInit.DISTRIBUTION);
    hiddenLayerBuilder.dist(new UniformDistribution(0, 1));

    // build and set as layer 0
    listBuilder.layer(0, hiddenLayerBuilder.build());

    // MCXENT or NEGATIVELOGLIKELIHOOD work ok for this example - this
    // function calculates the error-value
    // From homepage: Your net's purpose will determine the loss funtion you
    // use. For pretraining, choose reconstruction entropy. For
    // classification, use multiclass cross entropy.
    Builder outputLayerBuilder = new Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD);
    // must be the same amout as neurons in the layer before
    outputLayerBuilder.nIn(2);
    // two neurons in this layer
    outputLayerBuilder.nOut(2);
    outputLayerBuilder.activation("sigmoid");
    outputLayerBuilder.weightInit(WeightInit.DISTRIBUTION);
    outputLayerBuilder.dist(new UniformDistribution(0, 1));
    listBuilder.layer(1, outputLayerBuilder.build());

    // no pretrain phase for this network
    listBuilder.pretrain(false);

    // seems to be mandatory
    // according to agibsonccc: You typically only use that with
    // pretrain(true) when you want to do pretrain/finetune without changing
    // the previous layers finetuned weights that's for autoencoders and
    // rbms
    listBuilder.backprop(true);

    // build and init the network, will check if everything is configured
    // correct
    MultiLayerConfiguration conf = listBuilder.build();

    String[] inputFeatureNames = { "true (1) or false (0)", "true (1) or false (0)" };
    String[] outputLabelNames = { "false", "true" };
    MultiLayerNetworkEnhanced net = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames);
    net.init();

    // add an listener which outputs the error every 100 parameter updates
    //net.setListeners(new ScoreIterationListener(100));
    net.setListeners(new ModelListener(100, webSocketSession));

    // C&P from GravesLSTMCharModellingExample
    // Print the number of parameters in the network (and for each layer)
    Layer[] layers = net.getLayers();
    int totalNumParams = 0;
    for (int i = 0; i < layers.length; i++) {
        int nParams = layers[i].numParams();
        //System.out.println("Number of parameters in layer " + i + ": " + nParams);
        totalNumParams += nParams;
    }
    //System.out.println("Total number of network parameters: " + totalNumParams);

    // here the actual learning takes place
    net.fit(ds);

    // create output for every training sample
    INDArray output = net.output(ds.getFeatureMatrix());
    //System.out.println("output: " + output);

    for (int i = 0; i < output.rows(); i++) {
        String actual = ds.getLabels().getRow(i).toString().trim();
        String predicted = output.getRow(i).toString().trim();
        //System.out.println("actual " + actual + " vs predicted " + predicted);
    }

    // let Evaluation prints stats how often the right output had the
    // highest value
    Evaluation eval = new Evaluation(2);
    eval.eval(ds.getLabels(), output);
    System.out.println(eval.stats());

    //displayNetwork(net);

    // Make prediction
    INDArray example = Nd4j.zeros(1, 2);
    // create first dataset
    // when first input=0 and second input=0
    example.putScalar(new int[] { 0, 0 }, 0);
    example.putScalar(new int[] { 0, 1 }, 1);

    int[] prediction = net.predict(example);

    System.out.println("prediction for 0, 1: " + prediction[0]);

    return net;
}