Example usage for org.deeplearning4j.nn.conf Updater ADAM

List of usage examples for org.deeplearning4j.nn.conf Updater ADAM

Introduction

In this page you can find the example usage for org.deeplearning4j.nn.conf Updater ADAM.

Prototype

Updater ADAM

To view the source code for org.deeplearning4j.nn.conf Updater ADAM.

Click Source Link

Usage

From source file:gr.aueb.cs.nlp.computationgraphs.GraphConfigurations.java

License:Open Source License

/**
 * A 5 layer MLP that tries to guess the category of each example based on its features
 * @param totalCategories/*from  w  w w . j ava2  s  . co m*/
 * @param trainSet
 * @param testSet
 * @return The configuration that provides this model
 */
public static ComputationGraphConfiguration DeepMLPGraph(List<Word> trainSet) {
    int inputFeatures = trainSet.get(0).getFeatureVec().getValues().length;
    int outputLabels = trainSet.get(0).getFeatureVec().getLabels().length;

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.01)
            .regularization(true) //else it won't use regularizers
            .graphBuilder().addInputs("input") //can use any label for this, it is just an identifier for the graph.
            .addLayer("L1", new DenseLayer.Builder().nIn(inputFeatures) // always current nIn = Sum(nOut) of layer inputs
                    .nOut(300) //you have as many neurons as your outputs...
                    .biasLearningRate(0.2) //faster changing bias int he first layer then slower
                    .activation(Activation.RELU) // the enumerations are made by me, 
                    //to help me know which activations are available,
                    //for me relu work so often but they need more neurons per layer than others
                    .l1(0.3) //l1 regularization
                    .l2(0.02) //l2 regularization
                    .dropOut(0.3) //regularization via freezing whole neurons on a feedforward phase, 
                    //this works usual/y better than l1,l2 for me
                    .updater(Updater.NESTEROVS) //how the weights are updated. nesterovs uses the momentum
                    .momentum(0.3) //the higher the easier to escape a saddle point or miss an optimum
                    .build(), "input")//input here, means which layer is the input to this layes. 
            //so the layer with identifier "input" is the input for "L1" 
            .addLayer("L2", new DenseLayer.Builder().nIn(300) // an autoencoder for some feature extraction
                    .nOut(250).biasLearningRate(0.02).l1(0.3).l2(0.02).activation(Activation.RELU).dropOut(0.3)
                    .updater(Updater.ADAM)//how to use ADAM, read the ADAM paper 
                    //to understand better what they do https://arxiv.org/pdf/1412.6980.pdf
                    //usually cheaper training and better bias correction...
                    .adamMeanDecay(0.2).adamMeanDecay(0.2).build(), "L1")
            .addLayer("L3", new DenseLayer.Builder().nIn(250) // an autoencoder for some feature extraction
                    .nOut(300).biasLearningRate(0.02).activation(Activation.RELU).l1(0.1).l2(0.1).dropOut(0.3)
                    .build(), "L2")
            .addLayer("L4", new DenseLayer.Builder().nIn(300) // an autoencoder for some feature extraction
                    .nOut(250).biasLearningRate(0.03).l1(0.1).l2(0.1).activation(Activation.RELU).dropOut(0.3)
                    .build(), "L3")
            .addLayer("L5",
                    new OutputLayer.Builder().nIn(250).nOut(outputLabels).lossFunction(LossFunction.MCXENT) //categorical cross entropy, when building the output layer be very careful
                            //about pairing the right loss function with the appropriate activation
                            //e.g. why can't sigmoid work with hinge loss? cause [0,1] != [-1,1]
                            .activation(Activation.SOFTMAX) //softmax goes with categorical corss entropy
                            .build(),
                    "L4")
            .setOutputs("L5") //We need to specify the network outputs and their order
            .build();
    return conf;
}

From source file:gr.aueb.cs.nlp.computationgraphs.GraphConfigurations.java

License:Open Source License

/**
 * an example LSTM Graph//from  w ww .  ja  v a  2  s.c  om
 * @param totalCategories
 * @param trainSet
 * @param testSet
 * @return
 */
public static ComputationGraphConfiguration LSTMGraph(List<Word> trainSet) {
    int inputFeatures = trainSet.get(0).getFeatureVec().getValues().length;
    int outputLabels = trainSet.get(0).getFeatureVec().getLabels().length;

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.01)
            .regularization(true).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            .graphBuilder()

            .addInputs("input") //can use any label for this, it is just an identifier for the graph.
            .addLayer("L1", new GravesLSTM.Builder().nIn(inputFeatures) // always current nIn = Sum(nOut) of layer inputs
                    .nOut(150) //you have as many neurons as your outputs...
                    .biasLearningRate(0.2) //faster changing bias int he first layer then slower
                    .activation(Activation.TANH) // the enumerations are made by me, 
                    //to help me know which activations are available,
                    //for me relu work so often but they need more neurons per layer than others
                    .l1(0.3) //l1 regularization
                    .l2(0.02) //l2 regularization
                    .dropOut(0.3) //regularization via freezing whole neurons on a feedforward phase, 
                    //this works usual/y better than l1,l2 for me
                    .updater(Updater.ADADELTA) //how the weights are updated. nesterovs uses the momentum
                    .momentum(0.3) //the higher the easier to escape a saddle point or miss an optimum
                    .build(), "input")//input here, means which layer is the input to this layes. 
            //so the layer with identifier "input" is the input for "L1" 
            .addLayer("L2", new GravesLSTM.Builder().nIn(150) // an autoencoder for some feature extraction
                    .nOut(200).biasLearningRate(0.02).l1(0.3).l2(0.02).activation(Activation.RELU).dropOut(0.3)
                    .updater(Updater.ADAM)//how to use ADAM, read the ADAM paper 
                    //to understand better what they do https://arxiv.org/pdf/1412.6980.pdf
                    //usually cheaper training and better bias correction...
                    .adamMeanDecay(0.2).adamMeanDecay(0.2).build(), "L1")
            .addLayer("L3", new RnnOutputLayer.Builder().nIn(200).nOut(outputLabels).build(), "L2")
            .setOutputs("L3") //We need to specify the network outputs and their order
            .build();
    return conf;
}

From source file:org.wso2.carbon.ml.rest.api.neuralNetworks.FeedForwardNetwork.java

License:Open Source License

/**
 * method to map user selected Updater Algorithm to Updater object.
 * @param updater//  w w  w . j  a v a 2 s  . c o  m
 * @return an Updater object .
 */
Updater mapUpdater(String updater) {

    Updater updaterAlgo = null;

    switch (updater) {

    case "sgd":
        updaterAlgo = Updater.SGD;
        break;

    case "adam":
        updaterAlgo = Updater.ADAM;
        break;

    case "adadelta":
        updaterAlgo = Updater.ADADELTA;
        break;

    case "nesterovs":
        updaterAlgo = Updater.NESTEROVS;
        break;

    case "adagrad":
        updaterAlgo = Updater.ADAGRAD;
        break;

    case "rmsprop":
        updaterAlgo = Updater.RMSPROP;
        break;

    case "none":
        updaterAlgo = Updater.NONE;
        break;

    case "custom":
        updaterAlgo = Updater.CUSTOM;
        break;

    default:
        updaterAlgo = null;
        break;
    }
    return updaterAlgo;
}