List of usage examples for org.deeplearning4j.nn.conf NeuralNetConfiguration.Builder seed
long seed
To view the source code for org.deeplearning4j.nn.conf NeuralNetConfiguration.Builder seed.
Click Source Link
From source file:aiLogicImplementation.RNNBasic.java
License:Apache License
public void create(StatsStorage statsStorage) { NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder(); builder.iterations(1000);/*from w w w . j a v a 2s.com*/ builder.learningRate(0.01); builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); builder.seed(123); builder.biasInit(0.2); builder.miniBatch(true); // builder.updater(Updater.NESTEROVS); builder.updater(new Nesterovs(0.01)); builder.weightInit(WeightInit.XAVIER); builder.regularization(true); builder.l2(0.001); // builder.gradientNormalization(GradientNormalization.ClipL2PerParamType); // builder.gradientNormalizationThreshold(0.5); ListBuilder listBuilder = builder.list(); for (int i = 0; i < HIDDEN_LAYER_CONT; i++) { GravesLSTM.Builder hiddenLayerBuilder = new GravesLSTM.Builder(); hiddenLayerBuilder.nIn(i == 0 ? NUMBER_OF_FEATURE_INPUT : HIDDEN_LAYER_WIDTH); hiddenLayerBuilder.nOut(HIDDEN_LAYER_WIDTH); if (i == 0) { hiddenLayerBuilder.activation(Activation.TANH); } else { hiddenLayerBuilder.activation(Activation.TANH); } listBuilder.layer(i, hiddenLayerBuilder.build()); } // RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.XENT); RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.L2); // RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.MCXENT); // outputLayerBuilder.activation(Activation.SIGMOID); outputLayerBuilder.activation(Activation.RELU); // outputLayerBuilder.activation(Activation.SOFTMAX); outputLayerBuilder.nIn(HIDDEN_LAYER_WIDTH); outputLayerBuilder.nOut(NUMBER_OF_FEATURE_OUTPUT); outputLayerBuilder.weightInit(WeightInit.XAVIER); // outputLayerBuilder.dist(new UniformDistribution(0, 1)); listBuilder.layer(HIDDEN_LAYER_CONT, outputLayerBuilder.build()); // listBuilder.backpropType(BackpropType.TruncatedBPTT); // listBuilder.tBPTTForwardLength(tbpttLength); // listBuilder.tBPTTBackwardLength(tbpttLength); listBuilder.pretrain(false); listBuilder.backprop(true); // create network MultiLayerConfiguration conf = listBuilder.build(); net = new MultiLayerNetwork(conf); net.init(); // net.setListeners(new ScoreIterationListener(configuration.getListenerFrequency())); //Then add the StatsListener to collect this information from the network, as it trains List list = new ArrayList(); list.add(new StatsListener(statsStorage, configuration.getListenerFrequency())); list.add(new ScoreIterationListener(configuration.getListenerFrequency())); net.setListeners(list); /* * CREATE OUR TRAINING DATA */ input = Nd4j.zeros(maze.length * maze.length * hibertMaze.getHibertMazeGUI().getAlPlayers().size() * hibertMaze.getHibertMazeGUI().getAlFoods().size(), NUMBER_OF_FEATURE_INPUT); labels = Nd4j.zeros(maze.length * maze.length * hibertMaze.getHibertMazeGUI().getAlPlayers().size() * hibertMaze.getHibertMazeGUI().getAlFoods().size(), NUMBER_OF_FEATURE_OUTPUT); positionMoving = new PositionMoving(maze); int counter = 0; for (int i = 0; i < maze.length; i++) { for (int j = 0; j < maze[i].length; j++) { int mazeFieldValue = maze[i][j]; // input neuron for current-char is 1 at "samplePos" for (Player player : hibertMaze.getHibertMazeGUI().getAlPlayers()) { for (Food food : hibertMaze.getHibertMazeGUI().getAlFoods()) { Position position = new Position(i, j); input.putScalar(counter, 0, i); input.putScalar(counter, 1, j); input.putScalar(counter, 2, mazeFieldValue); input.putScalar(counter, 3, player.getPlayerNumber()); input.putScalar(counter, 4, i); input.putScalar(counter, 5, j); input.putScalar(counter, 6, food.getFoodNumber()); input.putScalar(counter, 7, food.getPosition().getX()); input.putScalar(counter, 8, food.getPosition().getY()); input.putScalar(counter, 9, positionMoving.moveNorth(position) ? 1.00d : 0.00d); input.putScalar(counter, 10, positionMoving.moveSouth(position) ? 1.00d : 0.00d); input.putScalar(counter, 11, positionMoving.moveEast(position) ? 1.00d : 0.00d); input.putScalar(counter, 12, positionMoving.moveWest(position) ? 1.00d : 0.00d); input.putScalar(counter, 13, player.getPosition().getX() == food.getPosition().getX() && player.getPosition().getY() == food.getPosition().getY() ? 1.00d : 0.00d); labels.putScalar(counter, 0, positionMoving.moveNorth(position) ? 1.00d : 0.00d); labels.putScalar(counter, 1, positionMoving.moveSouth(position) ? 1.00d : 0.00d); labels.putScalar(counter, 2, positionMoving.moveEast(position) ? 1.00d : 0.00d); labels.putScalar(counter, 3, positionMoving.moveWest(position) ? 1.00d : 0.00d); counter = counter + 1; } } } } trainingData = new DataSet(input, labels); }
From source file:com.javafxpert.neuralnetviz.scenario.BasicRNNExample.java
License:Apache License
public static MultiLayerNetworkEnhanced buildNetwork(WebSocketSession webSocketSession) throws Exception { //public static void main(String[] args) { // create a dedicated list of possible chars in LEARNSTRING_CHARS_LIST LinkedHashSet<Character> LEARNSTRING_CHARS = new LinkedHashSet<Character>(); for (char c : LEARNSTRING) LEARNSTRING_CHARS.add(c);/*from ww w . j a v a 2 s . c o m*/ LEARNSTRING_CHARS_LIST.addAll(LEARNSTRING_CHARS); System.out.println("LEARNSTRING_CHARS_LIST: " + LEARNSTRING_CHARS_LIST); // some common parameters NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder(); builder.iterations(10); builder.learningRate(0.001); builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); builder.seed(123); builder.biasInit(0); builder.miniBatch(false); builder.updater(Updater.RMSPROP); builder.weightInit(WeightInit.XAVIER); ListBuilder listBuilder = builder.list(); // first difference, for rnns we need to use GravesLSTM.Builder for (int i = 0; i < HIDDEN_LAYER_CONT; i++) { GravesLSTM.Builder hiddenLayerBuilder = new GravesLSTM.Builder(); hiddenLayerBuilder.nIn(i == 0 ? LEARNSTRING_CHARS.size() : HIDDEN_LAYER_WIDTH); hiddenLayerBuilder.nOut(HIDDEN_LAYER_WIDTH); // adopted activation function from GravesLSTMCharModellingExample // seems to work well with RNNs hiddenLayerBuilder.activation("tanh"); listBuilder.layer(i, hiddenLayerBuilder.build()); } // we need to use RnnOutputLayer for our RNN RnnOutputLayer.Builder outputLayerBuilder = new RnnOutputLayer.Builder(LossFunction.MCXENT); // softmax normalizes the output neurons, the sum of all outputs is 1 // this is required for our sampleFromDistribution-function outputLayerBuilder.activation("softmax"); outputLayerBuilder.nIn(HIDDEN_LAYER_WIDTH); outputLayerBuilder.nOut(LEARNSTRING_CHARS.size()); listBuilder.layer(HIDDEN_LAYER_CONT, outputLayerBuilder.build()); // finish builder listBuilder.pretrain(false); listBuilder.backprop(true); listBuilder.build(); // create network MultiLayerConfiguration conf = listBuilder.build(); String[] inputFeatureNames = {}; String[] outputLabelNames = {}; MultiLayerNetworkEnhanced net = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames); net.init(); //net.setListeners(new ScoreIterationListener(1)); net.setListeners(new ModelListener(100, webSocketSession)); /* * CREATE OUR TRAINING DATA */ // create input and output arrays: SAMPLE_INDEX, INPUT_NEURON, // SEQUENCE_POSITION INDArray input = Nd4j.zeros(1, LEARNSTRING_CHARS_LIST.size(), LEARNSTRING.length); INDArray labels = Nd4j.zeros(1, LEARNSTRING_CHARS_LIST.size(), LEARNSTRING.length); // loop through our sample-sentence int samplePos = 0; for (char currentChar : LEARNSTRING) { // small hack: when currentChar is the last, take the first char as // nextChar - not really required char nextChar = LEARNSTRING[(samplePos + 1) % (LEARNSTRING.length)]; // input neuron for current-char is 1 at "samplePos" input.putScalar(new int[] { 0, LEARNSTRING_CHARS_LIST.indexOf(currentChar), samplePos }, 1); // output neuron for next-char is 1 at "samplePos" labels.putScalar(new int[] { 0, LEARNSTRING_CHARS_LIST.indexOf(nextChar), samplePos }, 1); samplePos++; } DataSet trainingData = new DataSet(input, labels); // some epochs for (int epoch = 0; epoch < 100; epoch++) { System.out.println("Epoch " + epoch); // train the data net.fit(trainingData); // clear current stance from the last example net.rnnClearPreviousState(); // put the first caracter into the rrn as an initialisation INDArray testInit = Nd4j.zeros(LEARNSTRING_CHARS_LIST.size()); testInit.putScalar(LEARNSTRING_CHARS_LIST.indexOf(LEARNSTRING[0]), 1); // run one step -> IMPORTANT: rnnTimeStep() must be called, not // output() // the output shows what the net thinks what should come next INDArray output = net.rnnTimeStep(testInit); // now the net sould guess LEARNSTRING.length mor characters for (int j = 0; j < LEARNSTRING.length; j++) { // first process the last output of the network to a concrete // neuron, the neuron with the highest output cas the highest // cance to get chosen double[] outputProbDistribution = new double[LEARNSTRING_CHARS.size()]; for (int k = 0; k < outputProbDistribution.length; k++) { outputProbDistribution[k] = output.getDouble(k); } int sampledCharacterIdx = findIndexOfHighestValue(outputProbDistribution); // print the chosen output System.out.print(LEARNSTRING_CHARS_LIST.get(sampledCharacterIdx)); // use the last output as input INDArray nextInput = Nd4j.zeros(LEARNSTRING_CHARS_LIST.size()); nextInput.putScalar(sampledCharacterIdx, 1); output = net.rnnTimeStep(nextInput); } System.out.print("\n"); } return net; }
From source file:com.javafxpert.neuralnetviz.scenario.XorExample.java
License:Apache License
public static MultiLayerNetworkEnhanced buildNetwork(WebSocketSession webSocketSession) throws Exception { //public static void main(String[] args) throws Exception { //System.out.println("In XorExample.go()"); // list off input values, 4 training samples with data for 2 // input-neurons each INDArray input = Nd4j.zeros(4, 2);// ww w . j av a 2 s. co m //System.out.println("After INDArray input: " + input); // correspondending list with expected output values, 4 training samples // with data for 2 output-neurons each INDArray labels = Nd4j.zeros(4, 2); // create first dataset // when first input=0 and second input=0 input.putScalar(new int[] { 0, 0 }, 0); input.putScalar(new int[] { 0, 1 }, 0); // then the first output fires for false, and the second is 0 (see class // comment) labels.putScalar(new int[] { 0, 0 }, 1); labels.putScalar(new int[] { 0, 1 }, 0); // when first input=1 and second input=0 input.putScalar(new int[] { 1, 0 }, 1); input.putScalar(new int[] { 1, 1 }, 0); // then xor is true, therefore the second output neuron fires labels.putScalar(new int[] { 1, 0 }, 0); labels.putScalar(new int[] { 1, 1 }, 1); // same as above input.putScalar(new int[] { 2, 0 }, 0); input.putScalar(new int[] { 2, 1 }, 1); labels.putScalar(new int[] { 2, 0 }, 0); labels.putScalar(new int[] { 2, 1 }, 1); // when both inputs fire, xor is false again - the first output should // fire input.putScalar(new int[] { 3, 0 }, 1); input.putScalar(new int[] { 3, 1 }, 1); labels.putScalar(new int[] { 3, 0 }, 1); labels.putScalar(new int[] { 3, 1 }, 0); //System.out.println("Before DataSet ds"); // create dataset object DataSet ds = new DataSet(input, labels); //System.out.println("After DataSet ds: " + ds); // Set up network configuration NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder(); // how often should the training set be run, we need something above // 1000, or a higher learning-rate - found this values just by trial and // error builder.iterations(10000); // learning rate builder.learningRate(0.1); // fixed seed for the random generator, so any run of this program // brings the same results - may not work if you do something like // ds.shuffle() builder.seed(123); // not applicable, this network is to small - but for bigger networks it // can help that the network will not only recite the training data builder.useDropConnect(false); // a standard algorithm for moving on the error-plane, this one works // best for me, LINE_GRADIENT_DESCENT or CONJUGATE_GRADIENT can do the // job, too - it's an empirical value which one matches best to // your problem builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); // init the bias with 0 - empirical value, too builder.biasInit(0); // from "http://deeplearning4j.org/architecture": The networks can // process the input more quickly and more accurately by ingesting // minibatches 5-10 elements at a time in parallel. // this example runs better without, because the dataset is smaller than // the mini batch size builder.miniBatch(false); // create a multilayer network with 2 layers (including the output // layer, excluding the input payer) ListBuilder listBuilder = builder.list(); DenseLayer.Builder hiddenLayerBuilder = new DenseLayer.Builder(); // two input connections - simultaneously defines the number of input // neurons, because it's the first non-input-layer hiddenLayerBuilder.nIn(2); // number of outgooing connections, nOut simultaneously defines the // number of neurons in this layer hiddenLayerBuilder.nOut(2); // put the output through the sigmoid function, to cap the output // valuebetween 0 and 1 hiddenLayerBuilder.activation("sigmoid"); // random initialize weights with values between 0 and 1 hiddenLayerBuilder.weightInit(WeightInit.DISTRIBUTION); hiddenLayerBuilder.dist(new UniformDistribution(0, 1)); // build and set as layer 0 listBuilder.layer(0, hiddenLayerBuilder.build()); // MCXENT or NEGATIVELOGLIKELIHOOD work ok for this example - this // function calculates the error-value // From homepage: Your net's purpose will determine the loss funtion you // use. For pretraining, choose reconstruction entropy. For // classification, use multiclass cross entropy. Builder outputLayerBuilder = new Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD); // must be the same amout as neurons in the layer before outputLayerBuilder.nIn(2); // two neurons in this layer outputLayerBuilder.nOut(2); outputLayerBuilder.activation("sigmoid"); outputLayerBuilder.weightInit(WeightInit.DISTRIBUTION); outputLayerBuilder.dist(new UniformDistribution(0, 1)); listBuilder.layer(1, outputLayerBuilder.build()); // no pretrain phase for this network listBuilder.pretrain(false); // seems to be mandatory // according to agibsonccc: You typically only use that with // pretrain(true) when you want to do pretrain/finetune without changing // the previous layers finetuned weights that's for autoencoders and // rbms listBuilder.backprop(true); // build and init the network, will check if everything is configured // correct MultiLayerConfiguration conf = listBuilder.build(); String[] inputFeatureNames = { "true (1) or false (0)", "true (1) or false (0)" }; String[] outputLabelNames = { "false", "true" }; MultiLayerNetworkEnhanced net = new MultiLayerNetworkEnhanced(conf, inputFeatureNames, outputLabelNames); net.init(); // add an listener which outputs the error every 100 parameter updates //net.setListeners(new ScoreIterationListener(100)); net.setListeners(new ModelListener(100, webSocketSession)); // C&P from GravesLSTMCharModellingExample // Print the number of parameters in the network (and for each layer) Layer[] layers = net.getLayers(); int totalNumParams = 0; for (int i = 0; i < layers.length; i++) { int nParams = layers[i].numParams(); //System.out.println("Number of parameters in layer " + i + ": " + nParams); totalNumParams += nParams; } //System.out.println("Total number of network parameters: " + totalNumParams); // here the actual learning takes place net.fit(ds); // create output for every training sample INDArray output = net.output(ds.getFeatureMatrix()); //System.out.println("output: " + output); for (int i = 0; i < output.rows(); i++) { String actual = ds.getLabels().getRow(i).toString().trim(); String predicted = output.getRow(i).toString().trim(); //System.out.println("actual " + actual + " vs predicted " + predicted); } // let Evaluation prints stats how often the right output had the // highest value Evaluation eval = new Evaluation(2); eval.eval(ds.getLabels(), output); System.out.println(eval.stats()); //displayNetwork(net); // Make prediction INDArray example = Nd4j.zeros(1, 2); // create first dataset // when first input=0 and second input=0 example.putScalar(new int[] { 0, 0 }, 0); example.putScalar(new int[] { 0, 1 }, 1); int[] prediction = net.predict(example); System.out.println("prediction for 0, 1: " + prediction[0]); return net; }
From source file:com.sliit.neuralnetwork.RecurrentNN.java
public void buildModel() { System.out.println("Build model...."); iterations = outputNum + 1;//from w w w.j a v a 2s. c om NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder(); builder.iterations(iterations); builder.learningRate(0.001); // builder.momentum(0.01); builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); builder.seed(seed); builder.biasInit(1); builder.regularization(true).l2(1e-5); builder.updater(Updater.RMSPROP); builder.weightInit(WeightInit.XAVIER); NeuralNetConfiguration.ListBuilder list = builder.list(); for (int i = 0; i < HIDDEN_LAYER_COUNT; i++) { GravesLSTM.Builder hiddenLayerBuilder = new GravesLSTM.Builder(); hiddenLayerBuilder.nIn(i == 0 ? inputs : numHiddenNodes); hiddenLayerBuilder.nOut(numHiddenNodes); hiddenLayerBuilder.activation("tanh"); list.layer(i, hiddenLayerBuilder.build()); } RnnOutputLayer.Builder outputLayer = new RnnOutputLayer.Builder(LossFunction.MCXENT); outputLayer.activation("softmax"); outputLayer.nIn(numHiddenNodes); outputLayer.nOut(outputNum); list.layer(HIDDEN_LAYER_COUNT, outputLayer.build()); list.pretrain(false); list.backprop(true); MultiLayerConfiguration configuration = list.build(); model = new MultiLayerNetwork(configuration); model.init(); //model.setListeners(new ScoreIterationListener(1)); }
From source file:org.knime.ext.dl4j.base.mln.MultiLayerNetFactory.java
License:Open Source License
/** * Creates a {@link org.deeplearning4j.nn.conf.NeuralNetConfiguration.ListBuilder} using specified list of layers * and parameter values from members. Need to make sure that the <code>init()</code> methods was called before * calling this method if other values, than the default values, for the network parameters should be used. * * @param layers the layers which should be used to create the network * @return builder with set layers and parameters *///from w ww . jav a 2 s.c om protected NeuralNetConfiguration.ListBuilder createListBuilderWithLearnerParameters(final List<Layer> layers) { final NeuralNetConfiguration.Builder nnConfigBuilder = new NeuralNetConfiguration.Builder(); /* * Need to overwrite global parameters for each layer separately as setting the parameter * in the NeuralNetConfiguration does not overwrite it when it was already set in the * specific layer. Hence, need to clone layers to not alter original layers. */ final List<Layer> layersCopy = DLModelPortObjectUtils.cloneLayers(layers); if (m_useGlobalDropOut) { overwriteDropOut(layersCopy, m_dropOut); nnConfigBuilder.dropOut(m_dropOut); } if (m_useGlobalWeightInit) { overwriteWeightInit(layersCopy, m_weightInit); nnConfigBuilder.weightInit(m_weightInit); if (m_weightInit.equals(ParameterUtils.DISTRIBUTION_PARAMETER_CONDITION)) { Distribution dist; switch (m_distribution) { case BINOMIAL: dist = new BinomialDistribution(m_distributionBinomialTrails, m_distributionBinomialProbability); break; case NORMAL: dist = new NormalDistribution(m_distributionMean, m_distributionSTD); break; case UNIFORM: dist = new UniformDistribution(m_distributionLowerBound, m_distributionUpperBound); break; default: throw new IllegalArgumentException("No case defined for DL4JDistribution: " + m_distribution); } nnConfigBuilder.dist(dist); } } if (m_useGlobalLearningRate) { overwriteLearningRate(layersCopy, m_learningRate); } //setup number of input and output neurons ConfigurationUtils.setupLayers(layersCopy, m_nIn); if (m_useSeed) { nnConfigBuilder.seed(m_seed); } if (m_useGradientNormalization) { nnConfigBuilder.gradientNormalization(m_gradientNormalization); nnConfigBuilder.gradientNormalizationThreshold(m_gradientNormalizationThreshold); } if (m_useRegularization) { nnConfigBuilder.regularization(true); nnConfigBuilder.l1(m_l1); nnConfigBuilder.l2(m_l2); } // momentum moved to updaters, will only be used with NESTEROVS // if (m_useMomentum) { // nnConfigBuilder.momentum(m_momentum); // nnConfigBuilder.momentumAfter(m_momentumAfter); // } if (m_useDropConnect) { nnConfigBuilder.useDropConnect(true); } // the learning rate policy behaves strange, need to revise conditions if we want to add it // if (m_useAdvancedLearningRate) { // nnConfigBuilder.learningRateDecayPolicy(m_lrPolicy); // nnConfigBuilder.lrPolicyDecayRate(m_lrPolicyDecayRate); // if (m_lrPolicy.equals(ParameterUtils.LR_POWER_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicyPower(m_lrPolicyPower); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCHEDULE_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateSchedule(m_learningRateAfter); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCORE_BASED_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateScoreBasedDecayRate(m_lrPolicyScoreDecayRate); // } else if (m_lrPolicy.equals(ParameterUtils.LR_STEPS_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicySteps(m_lrPolicySteps); // } else if (m_lrPolicy.equals(ParameterUtils.LR_EXPONENTIAL_PARAMETER_CONDITION)) { // //no extra param // } // } if (m_useBiasInit) { nnConfigBuilder.biasInit(m_biasInit); } if (m_useBiasLearningRate) { overwriteBiasLearningRate(layersCopy, m_biasLearningRate); } nnConfigBuilder.iterations(m_iterations); nnConfigBuilder.updater(m_updater); if (m_updater.equals(ParameterUtils.ADAM_PARAMETER_CONDITION)) { nnConfigBuilder.adamMeanDecay(m_adamMeanDecay); nnConfigBuilder.adamVarDecay(m_adamVarDecay); } else if (m_updater.equals(ParameterUtils.ADADELTA_PARAMETER_CONDITION)) { nnConfigBuilder.rho(m_adadeltaRho); } else if (m_updater.equals(ParameterUtils.RMSPROP_PARAMETER_CONDITION)) { nnConfigBuilder.rmsDecay(m_rmsDecay); } else if (m_updater.equals(ParameterUtils.NESTEROVS_PARAMETER_CONDITION)) { nnConfigBuilder.momentum(m_momentum); nnConfigBuilder.momentumAfter(m_momentumAfter); } //The new dialogs have a use updater checkbox which is not present in the old dialogs and is true by default. Therefore, //we overwrite the updater with the default values, if the checkbox is not checked. For the old dialog this will not happen //as the value is true by default. if (!m_useUpdater) { nnConfigBuilder.updater(Updater.valueOf(LearnerParameter.DEFAULT_UPDATER)); nnConfigBuilder.momentum(LearnerParameter.DEFAULT_MOMENTUM); } nnConfigBuilder.optimizationAlgo(m_optimization); if (ParameterUtils.MAX_LINE_SEARCH_ITERATIONS_CONDITION.contains(m_optimization)) { nnConfigBuilder.maxNumLineSearchIterations(m_maxNumLineSearchIterations); } //very strange dl4j behaviour, for unsupervised layers (RBM, Autoencoder) we need //to modify the step function elswise learning does not work (error does not decrease) //nnConfigBuilder.stepFunction(new DefaultStepFunction()); final NeuralNetConfiguration.ListBuilder listBuilder = nnConfigBuilder.list(); int currentLayerIndex = 0; for (final Layer layer : layersCopy) { listBuilder.layer(currentLayerIndex, layer); currentLayerIndex++; } listBuilder.pretrain(m_usePretrain); listBuilder.backprop(m_useBackprop); return listBuilder; }
From source file:org.knime.ext.dl4j.base.mln.MultiLayerNetFactory2.java
License:Open Source License
/** * Creates a {@link org.deeplearning4j.nn.conf.NeuralNetConfiguration.ListBuilder} using specified list of layers * and parameter.//from w w w.ja v a2 s . c o m * * @param layers the layers which should be used to create the network * @param lp * @return builder with set layers and parameters */ protected NeuralNetConfiguration.ListBuilder createListBuilderWithLearnerParameters(final List<Layer> layers, final LearnerParameterSettingsModels2 lp) { //boolean boolean m_useGlobalDropOut = lp.getBoolean(LearnerParameter.USE_GLOBAL_DROP_OUT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useGlobalWeightInit = lp.getBoolean(LearnerParameter.USE_GLOBAL_WEIGHT_INIT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useGlobalLearningRate = lp.getBoolean(LearnerParameter.USE_GLOBAL_LEARNING_RATE, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useDropConnect = lp.getBoolean(LearnerParameter.USE_DROP_CONNECT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useBackprop = lp.getBoolean(LearnerParameter.USE_BACKPROP, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useSeed = lp.getBoolean(LearnerParameter.USE_SEED, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useGradientNormalization = lp.getBoolean(LearnerParameter.USE_GRADIENT_NORMALIZATION, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useRegularization = lp.getBoolean(LearnerParameter.USE_REGULARIZATION, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useBiasInit = lp.getBoolean(LearnerParameter.USE_BIAS_INIT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useBiasLearningRate = lp.getBoolean(LearnerParameter.USE_BIAS_LEARNING_RATE, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useUpdater = lp.getBoolean(LearnerParameter.USE_UPDATER, LearnerParameter.DEFAULT_USE_UPDATER); boolean m_usePretrain = lp.getBoolean(LearnerParameter.USE_PRETRAIN, LearnerParameter.DEFAULT_BOOLEAN); //double double m_dropOut = lp.getDouble(LearnerParameter.GLOBAL_DROP_OUT, LearnerParameter.DEFAULT_DOUBLE); double m_distributionBinomialProbability = lp.getDouble(LearnerParameter.DISTRIBUTION_BINOMIAL_PROBABILITY, LearnerParameter.DEFAULT_DOUBLE); double m_distributionMean = lp.getDouble(LearnerParameter.DISTRIBUTION_MEAN, LearnerParameter.DEFAULT_DOUBLE); double m_distributionSTD = lp.getDouble(LearnerParameter.DISTRIBUTION_STD, LearnerParameter.DEFAULT_DOUBLE); double m_distributionLowerBound = lp.getDouble(LearnerParameter.DISTRIBUTION_LOWER_BOUND, LearnerParameter.DEFAULT_DOUBLE); double m_distributionUpperBound = lp.getDouble(LearnerParameter.DISTRIBUTION_UPPER_BOUND, LearnerParameter.DEFAULT_DOUBLE); double m_learningRate = lp.getDouble(LearnerParameter.GLOBAL_LEARNING_RATE, LearnerParameter.DEFAULT_LEARNING_RATE); double m_gradientNormalizationThreshold = lp.getDouble(LearnerParameter.GRADIENT_NORMALIZATION_THRESHOLD, LearnerParameter.DEFAULT_GRADIENT_NORMALIZATION_THRESHOLD); double m_l1 = lp.getDouble(LearnerParameter.L1, LearnerParameter.DEFAULT_DOUBLE); double m_l2 = lp.getDouble(LearnerParameter.L2, LearnerParameter.DEFAULT_DOUBLE); double m_biasInit = lp.getDouble(LearnerParameter.BIAS_INIT, LearnerParameter.DEFAULT_DOUBLE); double m_biasLearningRate = lp.getDouble(LearnerParameter.BIAS_LEARNING_RATE, LearnerParameter.DEFAULT_DOUBLE); double m_adamMeanDecay = lp.getDouble(LearnerParameter.ADAM_MEAN_DECAY, LearnerParameter.DEFAULT_ADAM_MEAN_DECAY); double m_adamVarDecay = lp.getDouble(LearnerParameter.ADAM_VAR_DECAY, LearnerParameter.DEFAULT_ADAM_VAR_DECAY); double m_adadeltaRho = lp.getDouble(LearnerParameter.ADADELTA_RHO, LearnerParameter.DEFAULT_ADADELTA_RHO); double m_rmsDecay = lp.getDouble(LearnerParameter.RMS_DECAY, LearnerParameter.DEFAULT_RMS_DECAY); double m_momentum = lp.getDouble(LearnerParameter.MOMENTUM, LearnerParameter.DEFAULT_MOMENTUM); //int Integer m_distributionBinomialTrails = lp.getInteger(LearnerParameter.DISTRIBUTION_BINOMIAL_TRAILS, LearnerParameter.DEFAULT_INT); Integer m_seed = lp.getInteger(LearnerParameter.SEED, LearnerParameter.DEFAULT_INT); Integer m_iterations = lp.getInteger(LearnerParameter.TRAINING_ITERATIONS, LearnerParameter.DEFAULT_INT); Integer m_maxNumLineSearchIterations = lp.getInteger(LearnerParameter.MAX_NUMBER_LINE_SEARCH_ITERATIONS, LearnerParameter.DEFAULT_INT); //string WeightInit m_weightInit = WeightInit .valueOf(lp.getString(LearnerParameter.GLOBAL_WEIGHT_INIT, LearnerParameter.DEFAULT_WEIGHT_INIT)); DL4JDistribution m_distribution = DL4JDistribution .valueOf(lp.getString(LearnerParameter.DISTRIBUTION, LearnerParameter.DEFAULT_DISTRIBUTION)); GradientNormalization m_gradientNormalization = DL4JGradientNormalization .fromToString(lp.getString(LearnerParameter.GRADIENT_NORMALIZATION, LearnerParameter.DEFAULT_GRADIENT_NORMALIZATION)) .getDL4JValue(); Updater m_updater = Updater .valueOf(lp.getString(LearnerParameter.UPDATER, LearnerParameter.DEFAULT_UPDATER)); Map<Integer, Double> m_momentumAfter = ParameterUtils .convertStringToMap(lp.getString(LearnerParameter.MOMENTUM_AFTER, LearnerParameter.DEFAULT_MAP)); OptimizationAlgorithm m_optimization = DL4JOptimizationAlgorithm.fromToString( lp.getString(LearnerParameter.OPTIMIZATION_ALGORITHM, LearnerParameter.DEFAULT_OPTIMIZATION)) .getDL4JValue(); final NeuralNetConfiguration.Builder nnConfigBuilder = new NeuralNetConfiguration.Builder(); /* * Since 0.9.1: Use DL4J workspaces which makes training faster and we can reduce the number of GC calls of DL4J. * For now use WorkspaceMode.SEPARATE for both training and inference. */ nnConfigBuilder.inferenceWorkspaceMode(WorkspaceMode.SEPARATE); nnConfigBuilder.trainingWorkspaceMode(WorkspaceMode.SEPARATE); /* * Need to overwrite global parameters for each layer separately as setting the parameter * in the NeuralNetConfiguration does not overwrite it when it was already set in the * specific layer. Hence, need to clone layers to not alter original layers. */ final List<Layer> layersCopy = DLModelPortObjectUtils.cloneLayers(layers); if (m_useGlobalDropOut) { overwriteDropOut(layersCopy, m_dropOut); nnConfigBuilder.dropOut(m_dropOut); } if (m_useGlobalWeightInit) { overwriteWeightInit(layersCopy, m_weightInit); nnConfigBuilder.weightInit(m_weightInit); if (m_weightInit.equals(ParameterUtils.DISTRIBUTION_PARAMETER_CONDITION)) { Distribution dist; switch (m_distribution) { case BINOMIAL: dist = new BinomialDistribution(m_distributionBinomialTrails, m_distributionBinomialProbability); break; case NORMAL: dist = new NormalDistribution(m_distributionMean, m_distributionSTD); break; case UNIFORM: dist = new UniformDistribution(m_distributionLowerBound, m_distributionUpperBound); break; default: throw new IllegalArgumentException("No case defined for DL4JDistribution: " + m_distribution); } nnConfigBuilder.dist(dist); } } if (m_useGlobalLearningRate) { overwriteLearningRate(layersCopy, m_learningRate); } //setup number of input and output neurons //ConfigurationUtils.setupLayers(layersCopy, m_nIn); //substituted with 'setInputType(InputType.feedForward(m_nIn)' if (m_useSeed) { nnConfigBuilder.seed(m_seed); } if (m_useGradientNormalization) { nnConfigBuilder.gradientNormalization(m_gradientNormalization); nnConfigBuilder.gradientNormalizationThreshold(m_gradientNormalizationThreshold); } if (m_useRegularization) { nnConfigBuilder.regularization(true); nnConfigBuilder.l1(m_l1); nnConfigBuilder.l2(m_l2); } // momentum moved to updaters, will only be used with NESTEROVS // if (m_useMomentum) { // nnConfigBuilder.momentum(m_momentum); // nnConfigBuilder.momentumAfter(m_momentumAfter); // } if (m_useDropConnect) { nnConfigBuilder.useDropConnect(true); } // the learning rate policy behaves strange, need to revise conditions if we want to add it // if (m_useAdvancedLearningRate) { // nnConfigBuilder.learningRateDecayPolicy(m_lrPolicy); // nnConfigBuilder.lrPolicyDecayRate(m_lrPolicyDecayRate); // if (m_lrPolicy.equals(ParameterUtils.LR_POWER_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicyPower(m_lrPolicyPower); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCHEDULE_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateSchedule(m_learningRateAfter); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCORE_BASED_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateScoreBasedDecayRate(m_lrPolicyScoreDecayRate); // } else if (m_lrPolicy.equals(ParameterUtils.LR_STEPS_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicySteps(m_lrPolicySteps); // } else if (m_lrPolicy.equals(ParameterUtils.LR_EXPONENTIAL_PARAMETER_CONDITION)) { // //no extra param // } // } if (m_useBiasInit) { nnConfigBuilder.biasInit(m_biasInit); } if (m_useBiasLearningRate) { overwriteBiasLearningRate(layersCopy, m_biasLearningRate); } nnConfigBuilder.iterations(m_iterations); nnConfigBuilder.updater(m_updater); if (m_updater.equals(ParameterUtils.ADAM_PARAMETER_CONDITION)) { nnConfigBuilder.adamMeanDecay(m_adamMeanDecay); nnConfigBuilder.adamVarDecay(m_adamVarDecay); } else if (m_updater.equals(ParameterUtils.ADADELTA_PARAMETER_CONDITION)) { nnConfigBuilder.rho(m_adadeltaRho); } else if (m_updater.equals(ParameterUtils.RMSPROP_PARAMETER_CONDITION)) { nnConfigBuilder.rmsDecay(m_rmsDecay); } else if (m_updater.equals(ParameterUtils.NESTEROVS_PARAMETER_CONDITION)) { nnConfigBuilder.momentum(m_momentum); nnConfigBuilder.momentumAfter(m_momentumAfter); } //The new dialogs have a use updater checkbox which is not present in the old dialogs and is true by default. Therefore, //we overwrite the updater with the default values, if the checkbox is not checked. For the old dialog this will not happen //as the value is true by default. if (!m_useUpdater) { nnConfigBuilder.updater(Updater.valueOf(LearnerParameter.DEFAULT_UPDATER)); nnConfigBuilder.momentum(LearnerParameter.DEFAULT_MOMENTUM); } nnConfigBuilder.optimizationAlgo(m_optimization); if (ParameterUtils.MAX_LINE_SEARCH_ITERATIONS_CONDITION.contains(m_optimization)) { nnConfigBuilder.maxNumLineSearchIterations(m_maxNumLineSearchIterations); } //very strange dl4j behaviour, for unsupervised layers (RBM, Autoencoder) we need //to modify the step function elswise learning does not work (error does not decrease) //nnConfigBuilder.stepFunction(new DefaultStepFunction()); final NeuralNetConfiguration.ListBuilder listBuilder = nnConfigBuilder.list(); int currentLayerIndex = 0; for (final Layer layer : layersCopy) { listBuilder.layer(currentLayerIndex, layer); currentLayerIndex++; } listBuilder.pretrain(m_usePretrain); listBuilder.backprop(m_useBackprop); //infer correct number of inputs and outputs for each layer by using the number of inputs of the first layer //and the number of outputs for the following ones listBuilder.setInputType(InputType.feedForward(m_nIn)); return listBuilder; }