Example usage for org.deeplearning4j.nn.conf.distribution UniformDistribution UniformDistribution

List of usage examples for org.deeplearning4j.nn.conf.distribution UniformDistribution UniformDistribution

Introduction

In this page you can find the example usage for org.deeplearning4j.nn.conf.distribution UniformDistribution UniformDistribution.

Prototype

@JsonCreator
public UniformDistribution(@JsonProperty("lower") double lower, @JsonProperty("upper") double upper)
        throws NumberIsTooLargeException 

Source Link

Document

Create a uniform real distribution using the given lower and upper bounds.

Usage

From source file:com.javafxpert.neuralnetviz.scenario.XorExample.java

License:Apache License

public static MultiLayerNetworkEnhanced buildNetwork(WebSocketSession webSocketSession) throws Exception {
    //public static void main(String[] args) throws  Exception {

    //System.out.println("In XorExample.go()");

    // list off input values, 4 training samples with data for 2
    // input-neurons each
    INDArray input = Nd4j.zeros(4, 2);/*w w  w. j a va 2 s. co m*/

    //System.out.println("After INDArray input: " + input);

    // correspondending list with expected output values, 4 training samples
    // with data for 2 output-neurons each
    INDArray labels = Nd4j.zeros(4, 2);

    // create first dataset
    // when first input=0 and second input=0
    input.putScalar(new int[] { 0, 0 }, 0);
    input.putScalar(new int[] { 0, 1 }, 0);
    // then the first output fires for false, and the second is 0 (see class
    // comment)
    labels.putScalar(new int[] { 0, 0 }, 1);
    labels.putScalar(new int[] { 0, 1 }, 0);

    // when first input=1 and second input=0
    input.putScalar(new int[] { 1, 0 }, 1);
    input.putScalar(new int[] { 1, 1 }, 0);
    // then xor is true, therefore the second output neuron fires
    labels.putScalar(new int[] { 1, 0 }, 0);
    labels.putScalar(new int[] { 1, 1 }, 1);

    // same as above
    input.putScalar(new int[] { 2, 0 }, 0);
    input.putScalar(new int[] { 2, 1 }, 1);
    labels.putScalar(new int[] { 2, 0 }, 0);
    labels.putScalar(new int[] { 2, 1 }, 1);

    // when both inputs fire, xor is false again - the first output should
    // fire
    input.putScalar(new int[] { 3, 0 }, 1);
    input.putScalar(new int[] { 3, 1 }, 1);
    labels.putScalar(new int[] { 3, 0 }, 1);
    labels.putScalar(new int[] { 3, 1 }, 0);

    //System.out.println("Before DataSet ds");

    // create dataset object
    DataSet ds = new DataSet(input, labels);

    //System.out.println("After DataSet ds: " + ds);

    // Set up network configuration
    NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder();
    // how often should the training set be run, we need something above
    // 1000, or a higher learning-rate - found this values just by trial and
    // error
    builder.iterations(10000);
    // learning rate
    builder.learningRate(0.1);
    // fixed seed for the random generator, so any run of this program
    // brings the same results - may not work if you do something like
    // ds.shuffle()
    builder.seed(123);
    // not applicable, this network is to small - but for bigger networks it
    // can help that the network will not only recite the training data
    builder.useDropConnect(false);
    // a standard algorithm for moving on the error-plane, this one works
    // best for me, LINE_GRADIENT_DESCENT or CONJUGATE_GRADIENT can do the
    // job, too - it's an empirical value which one matches best to
    // your problem
    builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT);
    // init the bias with 0 - empirical value, too
    builder.biasInit(0);
    // from "http://deeplearning4j.org/architecture": The networks can
    // process the input more quickly and more accurately by ingesting
    // minibatches 5-10 elements at a time in parallel.
    // this example runs better without, because the dataset is smaller than
    // the mini batch size
    builder.miniBatch(false);

    // create a multilayer network with 2 layers (including the output
    // layer, excluding the input payer)
    ListBuilder listBuilder = builder.list();

    DenseLayer.Builder hiddenLayerBuilder = new DenseLayer.Builder();
    // two input connections - simultaneously defines the number of input
    // neurons, because it's the first non-input-layer
    hiddenLayerBuilder.nIn(2);
    // number of outgooing connections, nOut simultaneously defines the
    // number of neurons in this layer
    hiddenLayerBuilder.nOut(2);
    // put the output through the sigmoid function, to cap the output
    // valuebetween 0 and 1
    hiddenLayerBuilder.activation("sigmoid");
    // random initialize weights with values between 0 and 1
    hiddenLayerBuilder.weightInit(WeightInit.DISTRIBUTION);
    hiddenLayerBuilder.dist(new UniformDistribution(0, 1));

    // build and set as layer 0
    listBuilder.layer(0, hiddenLayerBuilder.build());

    // MCXENT or NEGATIVELOGLIKELIHOOD work ok for this example - this
    // function calculates the error-value
    // From homepage: Your net's purpose will determine the loss funtion you
    // use. For pretraining, choose reconstruction entropy. For
    // classification, use multiclass cross entropy.
    Builder outputLayerBuilder = new Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD);
    // must be the same amout as neurons in the layer before
    outputLayerBuilder.nIn(2);
    // two neurons in this layer
    outputLayerBuilder.nOut(2);
    outputLayerBuilder.activation("sigmoid");
    outputLayerBuilder.weightInit(WeightInit.DISTRIBUTION);
    outputLayerBuilder.dist(new UniformDistribution(0, 1));
    listBuilder.layer(1, outputLayerBuilder.build());

    // no pretrain phase for this network
    listBuilder.pretrain(false);

    // seems to be mandatory
    // according to agibsonccc: You typically only use that with
    // pretrain(true) when you want to do pretrain/finetune without changing
    // the previous layers finetuned weights that's for autoencoders and
    // rbms
    listBuilder.backprop(true);

    // build and init the network, will check if everything is configured
    // correct
    MultiLayerConfiguration conf = listBuilder.build();

    String[] inputFeatureNames = { "true (1) or false (0)", "true (1) or false (0)" };
    String[] outputLabelNames = { "false", "true" };
    MultiLayerNetworkEnhanced net = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames);
    net.init();

    // add an listener which outputs the error every 100 parameter updates
    //net.setListeners(new ScoreIterationListener(100));
    net.setListeners(new ModelListener(100, webSocketSession));

    // C&P from GravesLSTMCharModellingExample
    // Print the number of parameters in the network (and for each layer)
    Layer[] layers = net.getLayers();
    int totalNumParams = 0;
    for (int i = 0; i < layers.length; i++) {
        int nParams = layers[i].numParams();
        //System.out.println("Number of parameters in layer " + i + ": " + nParams);
        totalNumParams += nParams;
    }
    //System.out.println("Total number of network parameters: " + totalNumParams);

    // here the actual learning takes place
    net.fit(ds);

    // create output for every training sample
    INDArray output = net.output(ds.getFeatureMatrix());
    //System.out.println("output: " + output);

    for (int i = 0; i < output.rows(); i++) {
        String actual = ds.getLabels().getRow(i).toString().trim();
        String predicted = output.getRow(i).toString().trim();
        //System.out.println("actual " + actual + " vs predicted " + predicted);
    }

    // let Evaluation prints stats how often the right output had the
    // highest value
    Evaluation eval = new Evaluation(2);
    eval.eval(ds.getLabels(), output);
    System.out.println(eval.stats());

    //displayNetwork(net);

    // Make prediction
    INDArray example = Nd4j.zeros(1, 2);
    // create first dataset
    // when first input=0 and second input=0
    example.putScalar(new int[] { 0, 0 }, 0);
    example.putScalar(new int[] { 0, 1 }, 1);

    int[] prediction = net.predict(example);

    System.out.println("prediction for 0, 1: " + prediction[0]);

    return net;
}

From source file:org.ensor.fftmusings.autoencoder.RNNTrainer2.java

public static void main(String[] args) throws Exception {

    MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn");

    Random rng = new Random();

    RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out);

    int labels = iter.inputColumns();
    int lstmLayerSize = 200;
    int bttLength = 50;

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1)
            .rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001).list()
            .layer(0,/* w ww .  j  a  va  2 s .  com*/
                    new GravesLSTM.Builder().nIn(labels).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(2,
                    new RnnOutputLayer.Builder().nIn(lstmLayerSize).nOut(labels).lossFunction(LossFunction.MSE)
                            .updater(Updater.RMSPROP).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT)
            .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build();

    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();
    model.setListeners(new ScoreIterationListener(System.out));

    for (int epoch = 0; epoch < 300; epoch++) {
        model.fit(iter);
        iter.reset();
        evaluateModel(model, stackedAutoencoder, rng, epoch);
        ModelSerializer.writeModel(model, "stack-timeseries.rnn", true);
    }
}

From source file:org.ensor.fftmusings.rnn.qft.SampleLSTM.java

public static MultiLayerNetwork create(File modelFilename, DataSetIterator iter) throws IOException {

    if (modelFilename.exists()) {
        return load(modelFilename);
    }//from w  w  w  .j  a va  2s. co m

    int nOut = iter.totalOutcomes();

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
            .learningRate(0.01).rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).list()
            .layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize)
                    .updater(Updater.RMSPROP).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                    .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(2,
                    new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification
                            .updater(Updater.RMSPROP).nIn(lstmLayerSize).nOut(nOut)
                            .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.08, 0.08))
                            .build())
            .pretrain(false).backprop(true).build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.setListeners(new ScoreIterationListener());

    ModelSerializer.writeModel(net, modelFilename, true);

    return net;
}

From source file:org.ensor.fftmusings.rnn.RNNFactory.java

public static MultiLayerNetwork create(File modelFilename, CharacterIterator iter) throws IOException {

    if (modelFilename.exists()) {
        MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(modelFilename);
        net.clear();/*from ww  w  .  j a  va  2  s . c o m*/
        net.setListeners(new ScoreIterationListener(System.out));
        return net;
    }

    int nOut = iter.totalOutcomes();

    //Set up network configuration:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1)
            .rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).list()
            .layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize)
                    .updater(Updater.RMSPROP).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                    .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(1,
                    new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP)
                            .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION)
                            .dist(new UniformDistribution(-0.08, 0.08)).build())
            .layer(2,
                    new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification
                            .updater(Updater.RMSPROP).nIn(lstmLayerSize).nOut(nOut)
                            .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.08, 0.08))
                            .build())
            .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.setListeners(new ScoreIterationListener(System.out));

    ModelSerializer.writeModel(net, modelFilename, true);

    return net;
}

From source file:org.knime.ext.dl4j.base.mln.MultiLayerNetFactory.java

License:Open Source License

/**
 * Creates a {@link org.deeplearning4j.nn.conf.NeuralNetConfiguration.ListBuilder} using specified list of layers
 * and parameter values from members. Need to make sure that the <code>init()</code> methods was called before
 * calling this method if other values, than the default values, for the network parameters should be used.
 *
 * @param layers the layers which should be used to create the network
 * @return builder with set layers and parameters
 *///from  w  ww  .  j ava2  s  .  co  m
protected NeuralNetConfiguration.ListBuilder createListBuilderWithLearnerParameters(final List<Layer> layers) {

    final NeuralNetConfiguration.Builder nnConfigBuilder = new NeuralNetConfiguration.Builder();

    /*
     * Need to overwrite global parameters for each layer separately as setting the parameter
     * in the NeuralNetConfiguration does not overwrite it when it was already set in the
     * specific layer. Hence, need to clone layers to not alter original layers.
     */
    final List<Layer> layersCopy = DLModelPortObjectUtils.cloneLayers(layers);
    if (m_useGlobalDropOut) {
        overwriteDropOut(layersCopy, m_dropOut);
        nnConfigBuilder.dropOut(m_dropOut);
    }
    if (m_useGlobalWeightInit) {
        overwriteWeightInit(layersCopy, m_weightInit);
        nnConfigBuilder.weightInit(m_weightInit);

        if (m_weightInit.equals(ParameterUtils.DISTRIBUTION_PARAMETER_CONDITION)) {
            Distribution dist;
            switch (m_distribution) {
            case BINOMIAL:
                dist = new BinomialDistribution(m_distributionBinomialTrails,
                        m_distributionBinomialProbability);
                break;
            case NORMAL:
                dist = new NormalDistribution(m_distributionMean, m_distributionSTD);
                break;
            case UNIFORM:
                dist = new UniformDistribution(m_distributionLowerBound, m_distributionUpperBound);
                break;
            default:
                throw new IllegalArgumentException("No case defined for DL4JDistribution: " + m_distribution);
            }
            nnConfigBuilder.dist(dist);
        }
    }
    if (m_useGlobalLearningRate) {
        overwriteLearningRate(layersCopy, m_learningRate);
    }

    //setup number of input and output neurons
    ConfigurationUtils.setupLayers(layersCopy, m_nIn);

    if (m_useSeed) {
        nnConfigBuilder.seed(m_seed);
    }
    if (m_useGradientNormalization) {
        nnConfigBuilder.gradientNormalization(m_gradientNormalization);
        nnConfigBuilder.gradientNormalizationThreshold(m_gradientNormalizationThreshold);
    }
    if (m_useRegularization) {
        nnConfigBuilder.regularization(true);
        nnConfigBuilder.l1(m_l1);
        nnConfigBuilder.l2(m_l2);
    }

    // momentum moved to updaters, will only be used with NESTEROVS
    // if (m_useMomentum) {
    // nnConfigBuilder.momentum(m_momentum);
    // nnConfigBuilder.momentumAfter(m_momentumAfter);
    // }

    if (m_useDropConnect) {
        nnConfigBuilder.useDropConnect(true);
    }

    // the learning rate policy behaves strange, need to revise conditions if we want to add it
    //        if (m_useAdvancedLearningRate) {
    //            nnConfigBuilder.learningRateDecayPolicy(m_lrPolicy);
    //            nnConfigBuilder.lrPolicyDecayRate(m_lrPolicyDecayRate);
    //            if (m_lrPolicy.equals(ParameterUtils.LR_POWER_PARAMETER_CONDITION)) {
    //                nnConfigBuilder.lrPolicyPower(m_lrPolicyPower);
    //            } else if (m_lrPolicy.equals(ParameterUtils.LR_SCHEDULE_PARAMETER_CONDITION)) {
    //                nnConfigBuilder.learningRateSchedule(m_learningRateAfter);
    //            } else if (m_lrPolicy.equals(ParameterUtils.LR_SCORE_BASED_PARAMETER_CONDITION)) {
    //                nnConfigBuilder.learningRateScoreBasedDecayRate(m_lrPolicyScoreDecayRate);
    //            } else if (m_lrPolicy.equals(ParameterUtils.LR_STEPS_PARAMETER_CONDITION)) {
    //                nnConfigBuilder.lrPolicySteps(m_lrPolicySteps);
    //            } else if (m_lrPolicy.equals(ParameterUtils.LR_EXPONENTIAL_PARAMETER_CONDITION)) {
    //                //no extra param
    //            }
    //        }

    if (m_useBiasInit) {
        nnConfigBuilder.biasInit(m_biasInit);
    }
    if (m_useBiasLearningRate) {
        overwriteBiasLearningRate(layersCopy, m_biasLearningRate);
    }

    nnConfigBuilder.iterations(m_iterations);
    nnConfigBuilder.updater(m_updater);
    if (m_updater.equals(ParameterUtils.ADAM_PARAMETER_CONDITION)) {
        nnConfigBuilder.adamMeanDecay(m_adamMeanDecay);
        nnConfigBuilder.adamVarDecay(m_adamVarDecay);
    } else if (m_updater.equals(ParameterUtils.ADADELTA_PARAMETER_CONDITION)) {
        nnConfigBuilder.rho(m_adadeltaRho);
    } else if (m_updater.equals(ParameterUtils.RMSPROP_PARAMETER_CONDITION)) {
        nnConfigBuilder.rmsDecay(m_rmsDecay);
    } else if (m_updater.equals(ParameterUtils.NESTEROVS_PARAMETER_CONDITION)) {
        nnConfigBuilder.momentum(m_momentum);
        nnConfigBuilder.momentumAfter(m_momentumAfter);
    }
    //The new dialogs have a use updater checkbox which is not present in the old dialogs and is true by default. Therefore,
    //we overwrite the updater with the default values, if the checkbox is not checked. For the old dialog this will not happen
    //as the value is true by default.
    if (!m_useUpdater) {
        nnConfigBuilder.updater(Updater.valueOf(LearnerParameter.DEFAULT_UPDATER));
        nnConfigBuilder.momentum(LearnerParameter.DEFAULT_MOMENTUM);
    }

    nnConfigBuilder.optimizationAlgo(m_optimization);
    if (ParameterUtils.MAX_LINE_SEARCH_ITERATIONS_CONDITION.contains(m_optimization)) {
        nnConfigBuilder.maxNumLineSearchIterations(m_maxNumLineSearchIterations);
    }

    //very strange dl4j behaviour, for unsupervised layers (RBM, Autoencoder) we need
    //to modify the step function elswise learning does not work (error does not decrease)
    //nnConfigBuilder.stepFunction(new DefaultStepFunction());

    final NeuralNetConfiguration.ListBuilder listBuilder = nnConfigBuilder.list();

    int currentLayerIndex = 0;
    for (final Layer layer : layersCopy) {
        listBuilder.layer(currentLayerIndex, layer);
        currentLayerIndex++;
    }
    listBuilder.pretrain(m_usePretrain);
    listBuilder.backprop(m_useBackprop);

    return listBuilder;
}

From source file:org.knime.ext.dl4j.base.mln.MultiLayerNetFactory2.java

License:Open Source License

/**
 * Creates a {@link org.deeplearning4j.nn.conf.NeuralNetConfiguration.ListBuilder} using specified list of layers
 * and parameter.//from   w  w w .  j  a va2s . co m
 *
 * @param layers the layers which should be used to create the network
 * @param lp
 * @return builder with set layers and parameters
 */
protected NeuralNetConfiguration.ListBuilder createListBuilderWithLearnerParameters(final List<Layer> layers,
        final LearnerParameterSettingsModels2 lp) {

    //boolean
    boolean m_useGlobalDropOut = lp.getBoolean(LearnerParameter.USE_GLOBAL_DROP_OUT,
            LearnerParameter.DEFAULT_BOOLEAN);
    boolean m_useGlobalWeightInit = lp.getBoolean(LearnerParameter.USE_GLOBAL_WEIGHT_INIT,
            LearnerParameter.DEFAULT_BOOLEAN);
    boolean m_useGlobalLearningRate = lp.getBoolean(LearnerParameter.USE_GLOBAL_LEARNING_RATE,
            LearnerParameter.DEFAULT_BOOLEAN);
    boolean m_useDropConnect = lp.getBoolean(LearnerParameter.USE_DROP_CONNECT,
            LearnerParameter.DEFAULT_BOOLEAN);
    boolean m_useBackprop = lp.getBoolean(LearnerParameter.USE_BACKPROP, LearnerParameter.DEFAULT_BOOLEAN);
    boolean m_useSeed = lp.getBoolean(LearnerParameter.USE_SEED, LearnerParameter.DEFAULT_BOOLEAN);
    boolean m_useGradientNormalization = lp.getBoolean(LearnerParameter.USE_GRADIENT_NORMALIZATION,
            LearnerParameter.DEFAULT_BOOLEAN);
    boolean m_useRegularization = lp.getBoolean(LearnerParameter.USE_REGULARIZATION,
            LearnerParameter.DEFAULT_BOOLEAN);
    boolean m_useBiasInit = lp.getBoolean(LearnerParameter.USE_BIAS_INIT, LearnerParameter.DEFAULT_BOOLEAN);
    boolean m_useBiasLearningRate = lp.getBoolean(LearnerParameter.USE_BIAS_LEARNING_RATE,
            LearnerParameter.DEFAULT_BOOLEAN);
    boolean m_useUpdater = lp.getBoolean(LearnerParameter.USE_UPDATER, LearnerParameter.DEFAULT_USE_UPDATER);
    boolean m_usePretrain = lp.getBoolean(LearnerParameter.USE_PRETRAIN, LearnerParameter.DEFAULT_BOOLEAN);

    //double
    double m_dropOut = lp.getDouble(LearnerParameter.GLOBAL_DROP_OUT, LearnerParameter.DEFAULT_DOUBLE);
    double m_distributionBinomialProbability = lp.getDouble(LearnerParameter.DISTRIBUTION_BINOMIAL_PROBABILITY,
            LearnerParameter.DEFAULT_DOUBLE);
    double m_distributionMean = lp.getDouble(LearnerParameter.DISTRIBUTION_MEAN,
            LearnerParameter.DEFAULT_DOUBLE);
    double m_distributionSTD = lp.getDouble(LearnerParameter.DISTRIBUTION_STD, LearnerParameter.DEFAULT_DOUBLE);
    double m_distributionLowerBound = lp.getDouble(LearnerParameter.DISTRIBUTION_LOWER_BOUND,
            LearnerParameter.DEFAULT_DOUBLE);
    double m_distributionUpperBound = lp.getDouble(LearnerParameter.DISTRIBUTION_UPPER_BOUND,
            LearnerParameter.DEFAULT_DOUBLE);
    double m_learningRate = lp.getDouble(LearnerParameter.GLOBAL_LEARNING_RATE,
            LearnerParameter.DEFAULT_LEARNING_RATE);
    double m_gradientNormalizationThreshold = lp.getDouble(LearnerParameter.GRADIENT_NORMALIZATION_THRESHOLD,
            LearnerParameter.DEFAULT_GRADIENT_NORMALIZATION_THRESHOLD);
    double m_l1 = lp.getDouble(LearnerParameter.L1, LearnerParameter.DEFAULT_DOUBLE);
    double m_l2 = lp.getDouble(LearnerParameter.L2, LearnerParameter.DEFAULT_DOUBLE);
    double m_biasInit = lp.getDouble(LearnerParameter.BIAS_INIT, LearnerParameter.DEFAULT_DOUBLE);
    double m_biasLearningRate = lp.getDouble(LearnerParameter.BIAS_LEARNING_RATE,
            LearnerParameter.DEFAULT_DOUBLE);
    double m_adamMeanDecay = lp.getDouble(LearnerParameter.ADAM_MEAN_DECAY,
            LearnerParameter.DEFAULT_ADAM_MEAN_DECAY);
    double m_adamVarDecay = lp.getDouble(LearnerParameter.ADAM_VAR_DECAY,
            LearnerParameter.DEFAULT_ADAM_VAR_DECAY);
    double m_adadeltaRho = lp.getDouble(LearnerParameter.ADADELTA_RHO, LearnerParameter.DEFAULT_ADADELTA_RHO);
    double m_rmsDecay = lp.getDouble(LearnerParameter.RMS_DECAY, LearnerParameter.DEFAULT_RMS_DECAY);
    double m_momentum = lp.getDouble(LearnerParameter.MOMENTUM, LearnerParameter.DEFAULT_MOMENTUM);

    //int
    Integer m_distributionBinomialTrails = lp.getInteger(LearnerParameter.DISTRIBUTION_BINOMIAL_TRAILS,
            LearnerParameter.DEFAULT_INT);
    Integer m_seed = lp.getInteger(LearnerParameter.SEED, LearnerParameter.DEFAULT_INT);
    Integer m_iterations = lp.getInteger(LearnerParameter.TRAINING_ITERATIONS, LearnerParameter.DEFAULT_INT);
    Integer m_maxNumLineSearchIterations = lp.getInteger(LearnerParameter.MAX_NUMBER_LINE_SEARCH_ITERATIONS,
            LearnerParameter.DEFAULT_INT);

    //string
    WeightInit m_weightInit = WeightInit
            .valueOf(lp.getString(LearnerParameter.GLOBAL_WEIGHT_INIT, LearnerParameter.DEFAULT_WEIGHT_INIT));
    DL4JDistribution m_distribution = DL4JDistribution
            .valueOf(lp.getString(LearnerParameter.DISTRIBUTION, LearnerParameter.DEFAULT_DISTRIBUTION));
    GradientNormalization m_gradientNormalization = DL4JGradientNormalization
            .fromToString(lp.getString(LearnerParameter.GRADIENT_NORMALIZATION,
                    LearnerParameter.DEFAULT_GRADIENT_NORMALIZATION))
            .getDL4JValue();
    Updater m_updater = Updater
            .valueOf(lp.getString(LearnerParameter.UPDATER, LearnerParameter.DEFAULT_UPDATER));
    Map<Integer, Double> m_momentumAfter = ParameterUtils
            .convertStringToMap(lp.getString(LearnerParameter.MOMENTUM_AFTER, LearnerParameter.DEFAULT_MAP));
    OptimizationAlgorithm m_optimization = DL4JOptimizationAlgorithm.fromToString(
            lp.getString(LearnerParameter.OPTIMIZATION_ALGORITHM, LearnerParameter.DEFAULT_OPTIMIZATION))
            .getDL4JValue();

    final NeuralNetConfiguration.Builder nnConfigBuilder = new NeuralNetConfiguration.Builder();

    /*
     * Since 0.9.1: Use DL4J workspaces which makes training faster and we can reduce the number of GC calls of DL4J.
     * For now use WorkspaceMode.SEPARATE for both training and inference.
     */
    nnConfigBuilder.inferenceWorkspaceMode(WorkspaceMode.SEPARATE);
    nnConfigBuilder.trainingWorkspaceMode(WorkspaceMode.SEPARATE);

    /*
     * Need to overwrite global parameters for each layer separately as setting the parameter
     * in the NeuralNetConfiguration does not overwrite it when it was already set in the
     * specific layer. Hence, need to clone layers to not alter original layers.
     */
    final List<Layer> layersCopy = DLModelPortObjectUtils.cloneLayers(layers);
    if (m_useGlobalDropOut) {
        overwriteDropOut(layersCopy, m_dropOut);
        nnConfigBuilder.dropOut(m_dropOut);
    }
    if (m_useGlobalWeightInit) {
        overwriteWeightInit(layersCopy, m_weightInit);
        nnConfigBuilder.weightInit(m_weightInit);

        if (m_weightInit.equals(ParameterUtils.DISTRIBUTION_PARAMETER_CONDITION)) {
            Distribution dist;
            switch (m_distribution) {
            case BINOMIAL:
                dist = new BinomialDistribution(m_distributionBinomialTrails,
                        m_distributionBinomialProbability);
                break;
            case NORMAL:
                dist = new NormalDistribution(m_distributionMean, m_distributionSTD);
                break;
            case UNIFORM:
                dist = new UniformDistribution(m_distributionLowerBound, m_distributionUpperBound);
                break;
            default:
                throw new IllegalArgumentException("No case defined for DL4JDistribution: " + m_distribution);
            }
            nnConfigBuilder.dist(dist);
        }
    }
    if (m_useGlobalLearningRate) {
        overwriteLearningRate(layersCopy, m_learningRate);
    }

    //setup number of input and output neurons
    //ConfigurationUtils.setupLayers(layersCopy, m_nIn);
    //substituted with 'setInputType(InputType.feedForward(m_nIn)'

    if (m_useSeed) {
        nnConfigBuilder.seed(m_seed);
    }
    if (m_useGradientNormalization) {
        nnConfigBuilder.gradientNormalization(m_gradientNormalization);
        nnConfigBuilder.gradientNormalizationThreshold(m_gradientNormalizationThreshold);
    }
    if (m_useRegularization) {
        nnConfigBuilder.regularization(true);
        nnConfigBuilder.l1(m_l1);
        nnConfigBuilder.l2(m_l2);
    }

    // momentum moved to updaters, will only be used with NESTEROVS
    // if (m_useMomentum) {
    // nnConfigBuilder.momentum(m_momentum);
    // nnConfigBuilder.momentumAfter(m_momentumAfter);
    // }

    if (m_useDropConnect) {
        nnConfigBuilder.useDropConnect(true);
    }

    // the learning rate policy behaves strange, need to revise conditions if we want to add it
    //        if (m_useAdvancedLearningRate) {
    //            nnConfigBuilder.learningRateDecayPolicy(m_lrPolicy);
    //            nnConfigBuilder.lrPolicyDecayRate(m_lrPolicyDecayRate);
    //            if (m_lrPolicy.equals(ParameterUtils.LR_POWER_PARAMETER_CONDITION)) {
    //                nnConfigBuilder.lrPolicyPower(m_lrPolicyPower);
    //            } else if (m_lrPolicy.equals(ParameterUtils.LR_SCHEDULE_PARAMETER_CONDITION)) {
    //                nnConfigBuilder.learningRateSchedule(m_learningRateAfter);
    //            } else if (m_lrPolicy.equals(ParameterUtils.LR_SCORE_BASED_PARAMETER_CONDITION)) {
    //                nnConfigBuilder.learningRateScoreBasedDecayRate(m_lrPolicyScoreDecayRate);
    //            } else if (m_lrPolicy.equals(ParameterUtils.LR_STEPS_PARAMETER_CONDITION)) {
    //                nnConfigBuilder.lrPolicySteps(m_lrPolicySteps);
    //            } else if (m_lrPolicy.equals(ParameterUtils.LR_EXPONENTIAL_PARAMETER_CONDITION)) {
    //                //no extra param
    //            }
    //        }

    if (m_useBiasInit) {
        nnConfigBuilder.biasInit(m_biasInit);
    }
    if (m_useBiasLearningRate) {
        overwriteBiasLearningRate(layersCopy, m_biasLearningRate);
    }

    nnConfigBuilder.iterations(m_iterations);
    nnConfigBuilder.updater(m_updater);
    if (m_updater.equals(ParameterUtils.ADAM_PARAMETER_CONDITION)) {
        nnConfigBuilder.adamMeanDecay(m_adamMeanDecay);
        nnConfigBuilder.adamVarDecay(m_adamVarDecay);
    } else if (m_updater.equals(ParameterUtils.ADADELTA_PARAMETER_CONDITION)) {
        nnConfigBuilder.rho(m_adadeltaRho);
    } else if (m_updater.equals(ParameterUtils.RMSPROP_PARAMETER_CONDITION)) {
        nnConfigBuilder.rmsDecay(m_rmsDecay);
    } else if (m_updater.equals(ParameterUtils.NESTEROVS_PARAMETER_CONDITION)) {
        nnConfigBuilder.momentum(m_momentum);
        nnConfigBuilder.momentumAfter(m_momentumAfter);
    }
    //The new dialogs have a use updater checkbox which is not present in the old dialogs and is true by default. Therefore,
    //we overwrite the updater with the default values, if the checkbox is not checked. For the old dialog this will not happen
    //as the value is true by default.
    if (!m_useUpdater) {
        nnConfigBuilder.updater(Updater.valueOf(LearnerParameter.DEFAULT_UPDATER));
        nnConfigBuilder.momentum(LearnerParameter.DEFAULT_MOMENTUM);
    }

    nnConfigBuilder.optimizationAlgo(m_optimization);
    if (ParameterUtils.MAX_LINE_SEARCH_ITERATIONS_CONDITION.contains(m_optimization)) {
        nnConfigBuilder.maxNumLineSearchIterations(m_maxNumLineSearchIterations);
    }

    //very strange dl4j behaviour, for unsupervised layers (RBM, Autoencoder) we need
    //to modify the step function elswise learning does not work (error does not decrease)
    //nnConfigBuilder.stepFunction(new DefaultStepFunction());

    final NeuralNetConfiguration.ListBuilder listBuilder = nnConfigBuilder.list();

    int currentLayerIndex = 0;
    for (final Layer layer : layersCopy) {
        listBuilder.layer(currentLayerIndex, layer);
        currentLayerIndex++;
    }
    listBuilder.pretrain(m_usePretrain);
    listBuilder.backprop(m_useBackprop);

    //infer correct number of inputs and outputs for each layer by using the number of inputs of the first layer
    //and the number of outputs for the following ones
    listBuilder.setInputType(InputType.feedForward(m_nIn));

    return listBuilder;
}