List of usage examples for org.deeplearning4j.nn.conf.distribution UniformDistribution UniformDistribution
@JsonCreator public UniformDistribution(@JsonProperty("lower") double lower, @JsonProperty("upper") double upper) throws NumberIsTooLargeException
From source file:com.javafxpert.neuralnetviz.scenario.XorExample.java
License:Apache License
public static MultiLayerNetworkEnhanced buildNetwork(WebSocketSession webSocketSession) throws Exception { //public static void main(String[] args) throws Exception { //System.out.println("In XorExample.go()"); // list off input values, 4 training samples with data for 2 // input-neurons each INDArray input = Nd4j.zeros(4, 2);/*w w w. j a va 2 s. co m*/ //System.out.println("After INDArray input: " + input); // correspondending list with expected output values, 4 training samples // with data for 2 output-neurons each INDArray labels = Nd4j.zeros(4, 2); // create first dataset // when first input=0 and second input=0 input.putScalar(new int[] { 0, 0 }, 0); input.putScalar(new int[] { 0, 1 }, 0); // then the first output fires for false, and the second is 0 (see class // comment) labels.putScalar(new int[] { 0, 0 }, 1); labels.putScalar(new int[] { 0, 1 }, 0); // when first input=1 and second input=0 input.putScalar(new int[] { 1, 0 }, 1); input.putScalar(new int[] { 1, 1 }, 0); // then xor is true, therefore the second output neuron fires labels.putScalar(new int[] { 1, 0 }, 0); labels.putScalar(new int[] { 1, 1 }, 1); // same as above input.putScalar(new int[] { 2, 0 }, 0); input.putScalar(new int[] { 2, 1 }, 1); labels.putScalar(new int[] { 2, 0 }, 0); labels.putScalar(new int[] { 2, 1 }, 1); // when both inputs fire, xor is false again - the first output should // fire input.putScalar(new int[] { 3, 0 }, 1); input.putScalar(new int[] { 3, 1 }, 1); labels.putScalar(new int[] { 3, 0 }, 1); labels.putScalar(new int[] { 3, 1 }, 0); //System.out.println("Before DataSet ds"); // create dataset object DataSet ds = new DataSet(input, labels); //System.out.println("After DataSet ds: " + ds); // Set up network configuration NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder(); // how often should the training set be run, we need something above // 1000, or a higher learning-rate - found this values just by trial and // error builder.iterations(10000); // learning rate builder.learningRate(0.1); // fixed seed for the random generator, so any run of this program // brings the same results - may not work if you do something like // ds.shuffle() builder.seed(123); // not applicable, this network is to small - but for bigger networks it // can help that the network will not only recite the training data builder.useDropConnect(false); // a standard algorithm for moving on the error-plane, this one works // best for me, LINE_GRADIENT_DESCENT or CONJUGATE_GRADIENT can do the // job, too - it's an empirical value which one matches best to // your problem builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); // init the bias with 0 - empirical value, too builder.biasInit(0); // from "http://deeplearning4j.org/architecture": The networks can // process the input more quickly and more accurately by ingesting // minibatches 5-10 elements at a time in parallel. // this example runs better without, because the dataset is smaller than // the mini batch size builder.miniBatch(false); // create a multilayer network with 2 layers (including the output // layer, excluding the input payer) ListBuilder listBuilder = builder.list(); DenseLayer.Builder hiddenLayerBuilder = new DenseLayer.Builder(); // two input connections - simultaneously defines the number of input // neurons, because it's the first non-input-layer hiddenLayerBuilder.nIn(2); // number of outgooing connections, nOut simultaneously defines the // number of neurons in this layer hiddenLayerBuilder.nOut(2); // put the output through the sigmoid function, to cap the output // valuebetween 0 and 1 hiddenLayerBuilder.activation("sigmoid"); // random initialize weights with values between 0 and 1 hiddenLayerBuilder.weightInit(WeightInit.DISTRIBUTION); hiddenLayerBuilder.dist(new UniformDistribution(0, 1)); // build and set as layer 0 listBuilder.layer(0, hiddenLayerBuilder.build()); // MCXENT or NEGATIVELOGLIKELIHOOD work ok for this example - this // function calculates the error-value // From homepage: Your net's purpose will determine the loss funtion you // use. For pretraining, choose reconstruction entropy. For // classification, use multiclass cross entropy. Builder outputLayerBuilder = new Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD); // must be the same amout as neurons in the layer before outputLayerBuilder.nIn(2); // two neurons in this layer outputLayerBuilder.nOut(2); outputLayerBuilder.activation("sigmoid"); outputLayerBuilder.weightInit(WeightInit.DISTRIBUTION); outputLayerBuilder.dist(new UniformDistribution(0, 1)); listBuilder.layer(1, outputLayerBuilder.build()); // no pretrain phase for this network listBuilder.pretrain(false); // seems to be mandatory // according to agibsonccc: You typically only use that with // pretrain(true) when you want to do pretrain/finetune without changing // the previous layers finetuned weights that's for autoencoders and // rbms listBuilder.backprop(true); // build and init the network, will check if everything is configured // correct MultiLayerConfiguration conf = listBuilder.build(); String[] inputFeatureNames = { "true (1) or false (0)", "true (1) or false (0)" }; String[] outputLabelNames = { "false", "true" }; MultiLayerNetworkEnhanced net = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames); net.init(); // add an listener which outputs the error every 100 parameter updates //net.setListeners(new ScoreIterationListener(100)); net.setListeners(new ModelListener(100, webSocketSession)); // C&P from GravesLSTMCharModellingExample // Print the number of parameters in the network (and for each layer) Layer[] layers = net.getLayers(); int totalNumParams = 0; for (int i = 0; i < layers.length; i++) { int nParams = layers[i].numParams(); //System.out.println("Number of parameters in layer " + i + ": " + nParams); totalNumParams += nParams; } //System.out.println("Total number of network parameters: " + totalNumParams); // here the actual learning takes place net.fit(ds); // create output for every training sample INDArray output = net.output(ds.getFeatureMatrix()); //System.out.println("output: " + output); for (int i = 0; i < output.rows(); i++) { String actual = ds.getLabels().getRow(i).toString().trim(); String predicted = output.getRow(i).toString().trim(); //System.out.println("actual " + actual + " vs predicted " + predicted); } // let Evaluation prints stats how often the right output had the // highest value Evaluation eval = new Evaluation(2); eval.eval(ds.getLabels(), output); System.out.println(eval.stats()); //displayNetwork(net); // Make prediction INDArray example = Nd4j.zeros(1, 2); // create first dataset // when first input=0 and second input=0 example.putScalar(new int[] { 0, 0 }, 0); example.putScalar(new int[] { 0, 1 }, 1); int[] prediction = net.predict(example); System.out.println("prediction for 0, 1: " + prediction[0]); return net; }
From source file:org.ensor.fftmusings.autoencoder.RNNTrainer2.java
public static void main(String[] args) throws Exception { MultiLayerNetwork stackedAutoencoder = ModelSerializer.restoreMultiLayerNetwork("stack.rnn"); Random rng = new Random(); RNNIterator iter = new RNNIterator(stackedAutoencoder, rng, 100, 100, System.out); int labels = iter.inputColumns(); int lstmLayerSize = 200; int bttLength = 50; //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1) .rmsDecay(0.95).seed(12345).iterations(1).regularization(true).l2(0.001).list() .layer(0,/* w ww . j a va 2 s . com*/ new GravesLSTM.Builder().nIn(labels).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new RnnOutputLayer.Builder().nIn(lstmLayerSize).nOut(labels).lossFunction(LossFunction.MSE) .updater(Updater.RMSPROP).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT) .tBPTTForwardLength(bttLength).tBPTTBackwardLength(bttLength).build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(new ScoreIterationListener(System.out)); for (int epoch = 0; epoch < 300; epoch++) { model.fit(iter); iter.reset(); evaluateModel(model, stackedAutoencoder, rng, epoch); ModelSerializer.writeModel(model, "stack-timeseries.rnn", true); } }
From source file:org.ensor.fftmusings.rnn.qft.SampleLSTM.java
public static MultiLayerNetwork create(File modelFilename, DataSetIterator iter) throws IOException { if (modelFilename.exists()) { return load(modelFilename); }//from w w w .j a va 2s. co m int nOut = iter.totalOutcomes(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .learningRate(0.01).rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).list() .layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .updater(Updater.RMSPROP).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .updater(Updater.RMSPROP).nIn(lstmLayerSize).nOut(nOut) .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.08, 0.08)) .build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.setListeners(new ScoreIterationListener()); ModelSerializer.writeModel(net, modelFilename, true); return net; }
From source file:org.ensor.fftmusings.rnn.RNNFactory.java
public static MultiLayerNetwork create(File modelFilename, CharacterIterator iter) throws IOException { if (modelFilename.exists()) { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(modelFilename); net.clear();/*from ww w . j a va 2 s . c o m*/ net.setListeners(new ScoreIterationListener(System.out)); return net; } int nOut = iter.totalOutcomes(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.1) .rmsDecay(0.95).seed(12345).regularization(true).l2(0.001).list() .layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .updater(Updater.RMSPROP).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).updater(Updater.RMSPROP) .activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(-0.08, 0.08)).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .updater(Updater.RMSPROP).nIn(lstmLayerSize).nOut(nOut) .weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.08, 0.08)) .build()) .pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.setListeners(new ScoreIterationListener(System.out)); ModelSerializer.writeModel(net, modelFilename, true); return net; }
From source file:org.knime.ext.dl4j.base.mln.MultiLayerNetFactory.java
License:Open Source License
/** * Creates a {@link org.deeplearning4j.nn.conf.NeuralNetConfiguration.ListBuilder} using specified list of layers * and parameter values from members. Need to make sure that the <code>init()</code> methods was called before * calling this method if other values, than the default values, for the network parameters should be used. * * @param layers the layers which should be used to create the network * @return builder with set layers and parameters *///from w ww . j ava2 s . co m protected NeuralNetConfiguration.ListBuilder createListBuilderWithLearnerParameters(final List<Layer> layers) { final NeuralNetConfiguration.Builder nnConfigBuilder = new NeuralNetConfiguration.Builder(); /* * Need to overwrite global parameters for each layer separately as setting the parameter * in the NeuralNetConfiguration does not overwrite it when it was already set in the * specific layer. Hence, need to clone layers to not alter original layers. */ final List<Layer> layersCopy = DLModelPortObjectUtils.cloneLayers(layers); if (m_useGlobalDropOut) { overwriteDropOut(layersCopy, m_dropOut); nnConfigBuilder.dropOut(m_dropOut); } if (m_useGlobalWeightInit) { overwriteWeightInit(layersCopy, m_weightInit); nnConfigBuilder.weightInit(m_weightInit); if (m_weightInit.equals(ParameterUtils.DISTRIBUTION_PARAMETER_CONDITION)) { Distribution dist; switch (m_distribution) { case BINOMIAL: dist = new BinomialDistribution(m_distributionBinomialTrails, m_distributionBinomialProbability); break; case NORMAL: dist = new NormalDistribution(m_distributionMean, m_distributionSTD); break; case UNIFORM: dist = new UniformDistribution(m_distributionLowerBound, m_distributionUpperBound); break; default: throw new IllegalArgumentException("No case defined for DL4JDistribution: " + m_distribution); } nnConfigBuilder.dist(dist); } } if (m_useGlobalLearningRate) { overwriteLearningRate(layersCopy, m_learningRate); } //setup number of input and output neurons ConfigurationUtils.setupLayers(layersCopy, m_nIn); if (m_useSeed) { nnConfigBuilder.seed(m_seed); } if (m_useGradientNormalization) { nnConfigBuilder.gradientNormalization(m_gradientNormalization); nnConfigBuilder.gradientNormalizationThreshold(m_gradientNormalizationThreshold); } if (m_useRegularization) { nnConfigBuilder.regularization(true); nnConfigBuilder.l1(m_l1); nnConfigBuilder.l2(m_l2); } // momentum moved to updaters, will only be used with NESTEROVS // if (m_useMomentum) { // nnConfigBuilder.momentum(m_momentum); // nnConfigBuilder.momentumAfter(m_momentumAfter); // } if (m_useDropConnect) { nnConfigBuilder.useDropConnect(true); } // the learning rate policy behaves strange, need to revise conditions if we want to add it // if (m_useAdvancedLearningRate) { // nnConfigBuilder.learningRateDecayPolicy(m_lrPolicy); // nnConfigBuilder.lrPolicyDecayRate(m_lrPolicyDecayRate); // if (m_lrPolicy.equals(ParameterUtils.LR_POWER_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicyPower(m_lrPolicyPower); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCHEDULE_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateSchedule(m_learningRateAfter); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCORE_BASED_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateScoreBasedDecayRate(m_lrPolicyScoreDecayRate); // } else if (m_lrPolicy.equals(ParameterUtils.LR_STEPS_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicySteps(m_lrPolicySteps); // } else if (m_lrPolicy.equals(ParameterUtils.LR_EXPONENTIAL_PARAMETER_CONDITION)) { // //no extra param // } // } if (m_useBiasInit) { nnConfigBuilder.biasInit(m_biasInit); } if (m_useBiasLearningRate) { overwriteBiasLearningRate(layersCopy, m_biasLearningRate); } nnConfigBuilder.iterations(m_iterations); nnConfigBuilder.updater(m_updater); if (m_updater.equals(ParameterUtils.ADAM_PARAMETER_CONDITION)) { nnConfigBuilder.adamMeanDecay(m_adamMeanDecay); nnConfigBuilder.adamVarDecay(m_adamVarDecay); } else if (m_updater.equals(ParameterUtils.ADADELTA_PARAMETER_CONDITION)) { nnConfigBuilder.rho(m_adadeltaRho); } else if (m_updater.equals(ParameterUtils.RMSPROP_PARAMETER_CONDITION)) { nnConfigBuilder.rmsDecay(m_rmsDecay); } else if (m_updater.equals(ParameterUtils.NESTEROVS_PARAMETER_CONDITION)) { nnConfigBuilder.momentum(m_momentum); nnConfigBuilder.momentumAfter(m_momentumAfter); } //The new dialogs have a use updater checkbox which is not present in the old dialogs and is true by default. Therefore, //we overwrite the updater with the default values, if the checkbox is not checked. For the old dialog this will not happen //as the value is true by default. if (!m_useUpdater) { nnConfigBuilder.updater(Updater.valueOf(LearnerParameter.DEFAULT_UPDATER)); nnConfigBuilder.momentum(LearnerParameter.DEFAULT_MOMENTUM); } nnConfigBuilder.optimizationAlgo(m_optimization); if (ParameterUtils.MAX_LINE_SEARCH_ITERATIONS_CONDITION.contains(m_optimization)) { nnConfigBuilder.maxNumLineSearchIterations(m_maxNumLineSearchIterations); } //very strange dl4j behaviour, for unsupervised layers (RBM, Autoencoder) we need //to modify the step function elswise learning does not work (error does not decrease) //nnConfigBuilder.stepFunction(new DefaultStepFunction()); final NeuralNetConfiguration.ListBuilder listBuilder = nnConfigBuilder.list(); int currentLayerIndex = 0; for (final Layer layer : layersCopy) { listBuilder.layer(currentLayerIndex, layer); currentLayerIndex++; } listBuilder.pretrain(m_usePretrain); listBuilder.backprop(m_useBackprop); return listBuilder; }
From source file:org.knime.ext.dl4j.base.mln.MultiLayerNetFactory2.java
License:Open Source License
/** * Creates a {@link org.deeplearning4j.nn.conf.NeuralNetConfiguration.ListBuilder} using specified list of layers * and parameter.//from w w w . j a va2s . co m * * @param layers the layers which should be used to create the network * @param lp * @return builder with set layers and parameters */ protected NeuralNetConfiguration.ListBuilder createListBuilderWithLearnerParameters(final List<Layer> layers, final LearnerParameterSettingsModels2 lp) { //boolean boolean m_useGlobalDropOut = lp.getBoolean(LearnerParameter.USE_GLOBAL_DROP_OUT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useGlobalWeightInit = lp.getBoolean(LearnerParameter.USE_GLOBAL_WEIGHT_INIT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useGlobalLearningRate = lp.getBoolean(LearnerParameter.USE_GLOBAL_LEARNING_RATE, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useDropConnect = lp.getBoolean(LearnerParameter.USE_DROP_CONNECT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useBackprop = lp.getBoolean(LearnerParameter.USE_BACKPROP, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useSeed = lp.getBoolean(LearnerParameter.USE_SEED, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useGradientNormalization = lp.getBoolean(LearnerParameter.USE_GRADIENT_NORMALIZATION, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useRegularization = lp.getBoolean(LearnerParameter.USE_REGULARIZATION, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useBiasInit = lp.getBoolean(LearnerParameter.USE_BIAS_INIT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useBiasLearningRate = lp.getBoolean(LearnerParameter.USE_BIAS_LEARNING_RATE, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useUpdater = lp.getBoolean(LearnerParameter.USE_UPDATER, LearnerParameter.DEFAULT_USE_UPDATER); boolean m_usePretrain = lp.getBoolean(LearnerParameter.USE_PRETRAIN, LearnerParameter.DEFAULT_BOOLEAN); //double double m_dropOut = lp.getDouble(LearnerParameter.GLOBAL_DROP_OUT, LearnerParameter.DEFAULT_DOUBLE); double m_distributionBinomialProbability = lp.getDouble(LearnerParameter.DISTRIBUTION_BINOMIAL_PROBABILITY, LearnerParameter.DEFAULT_DOUBLE); double m_distributionMean = lp.getDouble(LearnerParameter.DISTRIBUTION_MEAN, LearnerParameter.DEFAULT_DOUBLE); double m_distributionSTD = lp.getDouble(LearnerParameter.DISTRIBUTION_STD, LearnerParameter.DEFAULT_DOUBLE); double m_distributionLowerBound = lp.getDouble(LearnerParameter.DISTRIBUTION_LOWER_BOUND, LearnerParameter.DEFAULT_DOUBLE); double m_distributionUpperBound = lp.getDouble(LearnerParameter.DISTRIBUTION_UPPER_BOUND, LearnerParameter.DEFAULT_DOUBLE); double m_learningRate = lp.getDouble(LearnerParameter.GLOBAL_LEARNING_RATE, LearnerParameter.DEFAULT_LEARNING_RATE); double m_gradientNormalizationThreshold = lp.getDouble(LearnerParameter.GRADIENT_NORMALIZATION_THRESHOLD, LearnerParameter.DEFAULT_GRADIENT_NORMALIZATION_THRESHOLD); double m_l1 = lp.getDouble(LearnerParameter.L1, LearnerParameter.DEFAULT_DOUBLE); double m_l2 = lp.getDouble(LearnerParameter.L2, LearnerParameter.DEFAULT_DOUBLE); double m_biasInit = lp.getDouble(LearnerParameter.BIAS_INIT, LearnerParameter.DEFAULT_DOUBLE); double m_biasLearningRate = lp.getDouble(LearnerParameter.BIAS_LEARNING_RATE, LearnerParameter.DEFAULT_DOUBLE); double m_adamMeanDecay = lp.getDouble(LearnerParameter.ADAM_MEAN_DECAY, LearnerParameter.DEFAULT_ADAM_MEAN_DECAY); double m_adamVarDecay = lp.getDouble(LearnerParameter.ADAM_VAR_DECAY, LearnerParameter.DEFAULT_ADAM_VAR_DECAY); double m_adadeltaRho = lp.getDouble(LearnerParameter.ADADELTA_RHO, LearnerParameter.DEFAULT_ADADELTA_RHO); double m_rmsDecay = lp.getDouble(LearnerParameter.RMS_DECAY, LearnerParameter.DEFAULT_RMS_DECAY); double m_momentum = lp.getDouble(LearnerParameter.MOMENTUM, LearnerParameter.DEFAULT_MOMENTUM); //int Integer m_distributionBinomialTrails = lp.getInteger(LearnerParameter.DISTRIBUTION_BINOMIAL_TRAILS, LearnerParameter.DEFAULT_INT); Integer m_seed = lp.getInteger(LearnerParameter.SEED, LearnerParameter.DEFAULT_INT); Integer m_iterations = lp.getInteger(LearnerParameter.TRAINING_ITERATIONS, LearnerParameter.DEFAULT_INT); Integer m_maxNumLineSearchIterations = lp.getInteger(LearnerParameter.MAX_NUMBER_LINE_SEARCH_ITERATIONS, LearnerParameter.DEFAULT_INT); //string WeightInit m_weightInit = WeightInit .valueOf(lp.getString(LearnerParameter.GLOBAL_WEIGHT_INIT, LearnerParameter.DEFAULT_WEIGHT_INIT)); DL4JDistribution m_distribution = DL4JDistribution .valueOf(lp.getString(LearnerParameter.DISTRIBUTION, LearnerParameter.DEFAULT_DISTRIBUTION)); GradientNormalization m_gradientNormalization = DL4JGradientNormalization .fromToString(lp.getString(LearnerParameter.GRADIENT_NORMALIZATION, LearnerParameter.DEFAULT_GRADIENT_NORMALIZATION)) .getDL4JValue(); Updater m_updater = Updater .valueOf(lp.getString(LearnerParameter.UPDATER, LearnerParameter.DEFAULT_UPDATER)); Map<Integer, Double> m_momentumAfter = ParameterUtils .convertStringToMap(lp.getString(LearnerParameter.MOMENTUM_AFTER, LearnerParameter.DEFAULT_MAP)); OptimizationAlgorithm m_optimization = DL4JOptimizationAlgorithm.fromToString( lp.getString(LearnerParameter.OPTIMIZATION_ALGORITHM, LearnerParameter.DEFAULT_OPTIMIZATION)) .getDL4JValue(); final NeuralNetConfiguration.Builder nnConfigBuilder = new NeuralNetConfiguration.Builder(); /* * Since 0.9.1: Use DL4J workspaces which makes training faster and we can reduce the number of GC calls of DL4J. * For now use WorkspaceMode.SEPARATE for both training and inference. */ nnConfigBuilder.inferenceWorkspaceMode(WorkspaceMode.SEPARATE); nnConfigBuilder.trainingWorkspaceMode(WorkspaceMode.SEPARATE); /* * Need to overwrite global parameters for each layer separately as setting the parameter * in the NeuralNetConfiguration does not overwrite it when it was already set in the * specific layer. Hence, need to clone layers to not alter original layers. */ final List<Layer> layersCopy = DLModelPortObjectUtils.cloneLayers(layers); if (m_useGlobalDropOut) { overwriteDropOut(layersCopy, m_dropOut); nnConfigBuilder.dropOut(m_dropOut); } if (m_useGlobalWeightInit) { overwriteWeightInit(layersCopy, m_weightInit); nnConfigBuilder.weightInit(m_weightInit); if (m_weightInit.equals(ParameterUtils.DISTRIBUTION_PARAMETER_CONDITION)) { Distribution dist; switch (m_distribution) { case BINOMIAL: dist = new BinomialDistribution(m_distributionBinomialTrails, m_distributionBinomialProbability); break; case NORMAL: dist = new NormalDistribution(m_distributionMean, m_distributionSTD); break; case UNIFORM: dist = new UniformDistribution(m_distributionLowerBound, m_distributionUpperBound); break; default: throw new IllegalArgumentException("No case defined for DL4JDistribution: " + m_distribution); } nnConfigBuilder.dist(dist); } } if (m_useGlobalLearningRate) { overwriteLearningRate(layersCopy, m_learningRate); } //setup number of input and output neurons //ConfigurationUtils.setupLayers(layersCopy, m_nIn); //substituted with 'setInputType(InputType.feedForward(m_nIn)' if (m_useSeed) { nnConfigBuilder.seed(m_seed); } if (m_useGradientNormalization) { nnConfigBuilder.gradientNormalization(m_gradientNormalization); nnConfigBuilder.gradientNormalizationThreshold(m_gradientNormalizationThreshold); } if (m_useRegularization) { nnConfigBuilder.regularization(true); nnConfigBuilder.l1(m_l1); nnConfigBuilder.l2(m_l2); } // momentum moved to updaters, will only be used with NESTEROVS // if (m_useMomentum) { // nnConfigBuilder.momentum(m_momentum); // nnConfigBuilder.momentumAfter(m_momentumAfter); // } if (m_useDropConnect) { nnConfigBuilder.useDropConnect(true); } // the learning rate policy behaves strange, need to revise conditions if we want to add it // if (m_useAdvancedLearningRate) { // nnConfigBuilder.learningRateDecayPolicy(m_lrPolicy); // nnConfigBuilder.lrPolicyDecayRate(m_lrPolicyDecayRate); // if (m_lrPolicy.equals(ParameterUtils.LR_POWER_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicyPower(m_lrPolicyPower); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCHEDULE_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateSchedule(m_learningRateAfter); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCORE_BASED_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateScoreBasedDecayRate(m_lrPolicyScoreDecayRate); // } else if (m_lrPolicy.equals(ParameterUtils.LR_STEPS_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicySteps(m_lrPolicySteps); // } else if (m_lrPolicy.equals(ParameterUtils.LR_EXPONENTIAL_PARAMETER_CONDITION)) { // //no extra param // } // } if (m_useBiasInit) { nnConfigBuilder.biasInit(m_biasInit); } if (m_useBiasLearningRate) { overwriteBiasLearningRate(layersCopy, m_biasLearningRate); } nnConfigBuilder.iterations(m_iterations); nnConfigBuilder.updater(m_updater); if (m_updater.equals(ParameterUtils.ADAM_PARAMETER_CONDITION)) { nnConfigBuilder.adamMeanDecay(m_adamMeanDecay); nnConfigBuilder.adamVarDecay(m_adamVarDecay); } else if (m_updater.equals(ParameterUtils.ADADELTA_PARAMETER_CONDITION)) { nnConfigBuilder.rho(m_adadeltaRho); } else if (m_updater.equals(ParameterUtils.RMSPROP_PARAMETER_CONDITION)) { nnConfigBuilder.rmsDecay(m_rmsDecay); } else if (m_updater.equals(ParameterUtils.NESTEROVS_PARAMETER_CONDITION)) { nnConfigBuilder.momentum(m_momentum); nnConfigBuilder.momentumAfter(m_momentumAfter); } //The new dialogs have a use updater checkbox which is not present in the old dialogs and is true by default. Therefore, //we overwrite the updater with the default values, if the checkbox is not checked. For the old dialog this will not happen //as the value is true by default. if (!m_useUpdater) { nnConfigBuilder.updater(Updater.valueOf(LearnerParameter.DEFAULT_UPDATER)); nnConfigBuilder.momentum(LearnerParameter.DEFAULT_MOMENTUM); } nnConfigBuilder.optimizationAlgo(m_optimization); if (ParameterUtils.MAX_LINE_SEARCH_ITERATIONS_CONDITION.contains(m_optimization)) { nnConfigBuilder.maxNumLineSearchIterations(m_maxNumLineSearchIterations); } //very strange dl4j behaviour, for unsupervised layers (RBM, Autoencoder) we need //to modify the step function elswise learning does not work (error does not decrease) //nnConfigBuilder.stepFunction(new DefaultStepFunction()); final NeuralNetConfiguration.ListBuilder listBuilder = nnConfigBuilder.list(); int currentLayerIndex = 0; for (final Layer layer : layersCopy) { listBuilder.layer(currentLayerIndex, layer); currentLayerIndex++; } listBuilder.pretrain(m_usePretrain); listBuilder.backprop(m_useBackprop); //infer correct number of inputs and outputs for each layer by using the number of inputs of the first layer //and the number of outputs for the following ones listBuilder.setInputType(InputType.feedForward(m_nIn)); return listBuilder; }