List of usage examples for org.deeplearning4j.nn.conf.distribution NormalDistribution NormalDistribution
@JsonCreator public NormalDistribution(@JsonProperty("mean") double mean, @JsonProperty("std") double std)
From source file:org.eigengo.rsa.identity.v100.AlexNet.java
License:Open Source License
public MultiLayerConfiguration conf() { double nonZeroBias = 1; double dropOut = 0.5; SubsamplingLayer.PoolingType poolingType = SubsamplingLayer.PoolingType.MAX; // TODO split and link kernel maps on GPUs - 2nd, 4th, 5th convolution should only connect maps on the same gpu, 3rd connects to all in 2nd MultiLayerConfiguration.Builder conf = new NeuralNetConfiguration.Builder().seed(seed) .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0.0, 0.01)).activation("relu") .updater(Updater.NESTEROVS).iterations(iterations) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) // normalize to prevent vanishing or exploding gradients .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1e-2) .biasLearningRate(1e-2 * 2).learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(0.1) .lrPolicySteps(100000).regularization(true).l2(5 * 1e-4).momentum(0.9).miniBatch(false).list() .layer(0,//from ww w . j ava2s.co m new ConvolutionLayer.Builder(new int[] { 11, 11 }, new int[] { 4, 4 }, new int[] { 3, 3 }) .name("cnn1").nIn(channels).nOut(96).build()) .layer(1, new LocalResponseNormalization.Builder().name("lrn1").build()) .layer(2, new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 }) .name("maxpool1").build()) .layer(3, new ConvolutionLayer.Builder(new int[] { 5, 5 }, new int[] { 1, 1 }, new int[] { 2, 2 }) .name("cnn2").nOut(256).biasInit(nonZeroBias).build()) .layer(4, new LocalResponseNormalization.Builder().name("lrn2").k(2).n(5).alpha(1e-4).beta(0.75) .build()) .layer(5, new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 }) .name("maxpool2").build()) .layer(6, new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }) .name("cnn3").nOut(384).build()) .layer(7, new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }) .name("cnn4").nOut(384).biasInit(nonZeroBias).build()) .layer(8, new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }) .name("cnn5").nOut(256).biasInit(nonZeroBias).build()) .layer(9, new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 }) .name("maxpool3").build()) .layer(10, new DenseLayer.Builder().name("ffn1").nOut(4096).dist(new GaussianDistribution(0, 0.005)) .biasInit(nonZeroBias).dropOut(dropOut).build()) .layer(11, new DenseLayer.Builder().name("ffn2").nOut(4096).dist(new GaussianDistribution(0, 0.005)) .biasInit(nonZeroBias).dropOut(dropOut).build()) .layer(12, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).name("output") .nOut(numLabels).activation("softmax").build()) .backprop(true).pretrain(false).cnnInputSize(height, width, channels); return conf.build(); }
From source file:org.knime.ext.dl4j.base.mln.MultiLayerNetFactory.java
License:Open Source License
/** * Creates a {@link org.deeplearning4j.nn.conf.NeuralNetConfiguration.ListBuilder} using specified list of layers * and parameter values from members. Need to make sure that the <code>init()</code> methods was called before * calling this method if other values, than the default values, for the network parameters should be used. * * @param layers the layers which should be used to create the network * @return builder with set layers and parameters *//*from w w w. java2 s . c om*/ protected NeuralNetConfiguration.ListBuilder createListBuilderWithLearnerParameters(final List<Layer> layers) { final NeuralNetConfiguration.Builder nnConfigBuilder = new NeuralNetConfiguration.Builder(); /* * Need to overwrite global parameters for each layer separately as setting the parameter * in the NeuralNetConfiguration does not overwrite it when it was already set in the * specific layer. Hence, need to clone layers to not alter original layers. */ final List<Layer> layersCopy = DLModelPortObjectUtils.cloneLayers(layers); if (m_useGlobalDropOut) { overwriteDropOut(layersCopy, m_dropOut); nnConfigBuilder.dropOut(m_dropOut); } if (m_useGlobalWeightInit) { overwriteWeightInit(layersCopy, m_weightInit); nnConfigBuilder.weightInit(m_weightInit); if (m_weightInit.equals(ParameterUtils.DISTRIBUTION_PARAMETER_CONDITION)) { Distribution dist; switch (m_distribution) { case BINOMIAL: dist = new BinomialDistribution(m_distributionBinomialTrails, m_distributionBinomialProbability); break; case NORMAL: dist = new NormalDistribution(m_distributionMean, m_distributionSTD); break; case UNIFORM: dist = new UniformDistribution(m_distributionLowerBound, m_distributionUpperBound); break; default: throw new IllegalArgumentException("No case defined for DL4JDistribution: " + m_distribution); } nnConfigBuilder.dist(dist); } } if (m_useGlobalLearningRate) { overwriteLearningRate(layersCopy, m_learningRate); } //setup number of input and output neurons ConfigurationUtils.setupLayers(layersCopy, m_nIn); if (m_useSeed) { nnConfigBuilder.seed(m_seed); } if (m_useGradientNormalization) { nnConfigBuilder.gradientNormalization(m_gradientNormalization); nnConfigBuilder.gradientNormalizationThreshold(m_gradientNormalizationThreshold); } if (m_useRegularization) { nnConfigBuilder.regularization(true); nnConfigBuilder.l1(m_l1); nnConfigBuilder.l2(m_l2); } // momentum moved to updaters, will only be used with NESTEROVS // if (m_useMomentum) { // nnConfigBuilder.momentum(m_momentum); // nnConfigBuilder.momentumAfter(m_momentumAfter); // } if (m_useDropConnect) { nnConfigBuilder.useDropConnect(true); } // the learning rate policy behaves strange, need to revise conditions if we want to add it // if (m_useAdvancedLearningRate) { // nnConfigBuilder.learningRateDecayPolicy(m_lrPolicy); // nnConfigBuilder.lrPolicyDecayRate(m_lrPolicyDecayRate); // if (m_lrPolicy.equals(ParameterUtils.LR_POWER_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicyPower(m_lrPolicyPower); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCHEDULE_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateSchedule(m_learningRateAfter); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCORE_BASED_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateScoreBasedDecayRate(m_lrPolicyScoreDecayRate); // } else if (m_lrPolicy.equals(ParameterUtils.LR_STEPS_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicySteps(m_lrPolicySteps); // } else if (m_lrPolicy.equals(ParameterUtils.LR_EXPONENTIAL_PARAMETER_CONDITION)) { // //no extra param // } // } if (m_useBiasInit) { nnConfigBuilder.biasInit(m_biasInit); } if (m_useBiasLearningRate) { overwriteBiasLearningRate(layersCopy, m_biasLearningRate); } nnConfigBuilder.iterations(m_iterations); nnConfigBuilder.updater(m_updater); if (m_updater.equals(ParameterUtils.ADAM_PARAMETER_CONDITION)) { nnConfigBuilder.adamMeanDecay(m_adamMeanDecay); nnConfigBuilder.adamVarDecay(m_adamVarDecay); } else if (m_updater.equals(ParameterUtils.ADADELTA_PARAMETER_CONDITION)) { nnConfigBuilder.rho(m_adadeltaRho); } else if (m_updater.equals(ParameterUtils.RMSPROP_PARAMETER_CONDITION)) { nnConfigBuilder.rmsDecay(m_rmsDecay); } else if (m_updater.equals(ParameterUtils.NESTEROVS_PARAMETER_CONDITION)) { nnConfigBuilder.momentum(m_momentum); nnConfigBuilder.momentumAfter(m_momentumAfter); } //The new dialogs have a use updater checkbox which is not present in the old dialogs and is true by default. Therefore, //we overwrite the updater with the default values, if the checkbox is not checked. For the old dialog this will not happen //as the value is true by default. if (!m_useUpdater) { nnConfigBuilder.updater(Updater.valueOf(LearnerParameter.DEFAULT_UPDATER)); nnConfigBuilder.momentum(LearnerParameter.DEFAULT_MOMENTUM); } nnConfigBuilder.optimizationAlgo(m_optimization); if (ParameterUtils.MAX_LINE_SEARCH_ITERATIONS_CONDITION.contains(m_optimization)) { nnConfigBuilder.maxNumLineSearchIterations(m_maxNumLineSearchIterations); } //very strange dl4j behaviour, for unsupervised layers (RBM, Autoencoder) we need //to modify the step function elswise learning does not work (error does not decrease) //nnConfigBuilder.stepFunction(new DefaultStepFunction()); final NeuralNetConfiguration.ListBuilder listBuilder = nnConfigBuilder.list(); int currentLayerIndex = 0; for (final Layer layer : layersCopy) { listBuilder.layer(currentLayerIndex, layer); currentLayerIndex++; } listBuilder.pretrain(m_usePretrain); listBuilder.backprop(m_useBackprop); return listBuilder; }
From source file:org.knime.ext.dl4j.base.mln.MultiLayerNetFactory2.java
License:Open Source License
/** * Creates a {@link org.deeplearning4j.nn.conf.NeuralNetConfiguration.ListBuilder} using specified list of layers * and parameter.// ww w .j a v a 2s .c om * * @param layers the layers which should be used to create the network * @param lp * @return builder with set layers and parameters */ protected NeuralNetConfiguration.ListBuilder createListBuilderWithLearnerParameters(final List<Layer> layers, final LearnerParameterSettingsModels2 lp) { //boolean boolean m_useGlobalDropOut = lp.getBoolean(LearnerParameter.USE_GLOBAL_DROP_OUT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useGlobalWeightInit = lp.getBoolean(LearnerParameter.USE_GLOBAL_WEIGHT_INIT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useGlobalLearningRate = lp.getBoolean(LearnerParameter.USE_GLOBAL_LEARNING_RATE, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useDropConnect = lp.getBoolean(LearnerParameter.USE_DROP_CONNECT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useBackprop = lp.getBoolean(LearnerParameter.USE_BACKPROP, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useSeed = lp.getBoolean(LearnerParameter.USE_SEED, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useGradientNormalization = lp.getBoolean(LearnerParameter.USE_GRADIENT_NORMALIZATION, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useRegularization = lp.getBoolean(LearnerParameter.USE_REGULARIZATION, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useBiasInit = lp.getBoolean(LearnerParameter.USE_BIAS_INIT, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useBiasLearningRate = lp.getBoolean(LearnerParameter.USE_BIAS_LEARNING_RATE, LearnerParameter.DEFAULT_BOOLEAN); boolean m_useUpdater = lp.getBoolean(LearnerParameter.USE_UPDATER, LearnerParameter.DEFAULT_USE_UPDATER); boolean m_usePretrain = lp.getBoolean(LearnerParameter.USE_PRETRAIN, LearnerParameter.DEFAULT_BOOLEAN); //double double m_dropOut = lp.getDouble(LearnerParameter.GLOBAL_DROP_OUT, LearnerParameter.DEFAULT_DOUBLE); double m_distributionBinomialProbability = lp.getDouble(LearnerParameter.DISTRIBUTION_BINOMIAL_PROBABILITY, LearnerParameter.DEFAULT_DOUBLE); double m_distributionMean = lp.getDouble(LearnerParameter.DISTRIBUTION_MEAN, LearnerParameter.DEFAULT_DOUBLE); double m_distributionSTD = lp.getDouble(LearnerParameter.DISTRIBUTION_STD, LearnerParameter.DEFAULT_DOUBLE); double m_distributionLowerBound = lp.getDouble(LearnerParameter.DISTRIBUTION_LOWER_BOUND, LearnerParameter.DEFAULT_DOUBLE); double m_distributionUpperBound = lp.getDouble(LearnerParameter.DISTRIBUTION_UPPER_BOUND, LearnerParameter.DEFAULT_DOUBLE); double m_learningRate = lp.getDouble(LearnerParameter.GLOBAL_LEARNING_RATE, LearnerParameter.DEFAULT_LEARNING_RATE); double m_gradientNormalizationThreshold = lp.getDouble(LearnerParameter.GRADIENT_NORMALIZATION_THRESHOLD, LearnerParameter.DEFAULT_GRADIENT_NORMALIZATION_THRESHOLD); double m_l1 = lp.getDouble(LearnerParameter.L1, LearnerParameter.DEFAULT_DOUBLE); double m_l2 = lp.getDouble(LearnerParameter.L2, LearnerParameter.DEFAULT_DOUBLE); double m_biasInit = lp.getDouble(LearnerParameter.BIAS_INIT, LearnerParameter.DEFAULT_DOUBLE); double m_biasLearningRate = lp.getDouble(LearnerParameter.BIAS_LEARNING_RATE, LearnerParameter.DEFAULT_DOUBLE); double m_adamMeanDecay = lp.getDouble(LearnerParameter.ADAM_MEAN_DECAY, LearnerParameter.DEFAULT_ADAM_MEAN_DECAY); double m_adamVarDecay = lp.getDouble(LearnerParameter.ADAM_VAR_DECAY, LearnerParameter.DEFAULT_ADAM_VAR_DECAY); double m_adadeltaRho = lp.getDouble(LearnerParameter.ADADELTA_RHO, LearnerParameter.DEFAULT_ADADELTA_RHO); double m_rmsDecay = lp.getDouble(LearnerParameter.RMS_DECAY, LearnerParameter.DEFAULT_RMS_DECAY); double m_momentum = lp.getDouble(LearnerParameter.MOMENTUM, LearnerParameter.DEFAULT_MOMENTUM); //int Integer m_distributionBinomialTrails = lp.getInteger(LearnerParameter.DISTRIBUTION_BINOMIAL_TRAILS, LearnerParameter.DEFAULT_INT); Integer m_seed = lp.getInteger(LearnerParameter.SEED, LearnerParameter.DEFAULT_INT); Integer m_iterations = lp.getInteger(LearnerParameter.TRAINING_ITERATIONS, LearnerParameter.DEFAULT_INT); Integer m_maxNumLineSearchIterations = lp.getInteger(LearnerParameter.MAX_NUMBER_LINE_SEARCH_ITERATIONS, LearnerParameter.DEFAULT_INT); //string WeightInit m_weightInit = WeightInit .valueOf(lp.getString(LearnerParameter.GLOBAL_WEIGHT_INIT, LearnerParameter.DEFAULT_WEIGHT_INIT)); DL4JDistribution m_distribution = DL4JDistribution .valueOf(lp.getString(LearnerParameter.DISTRIBUTION, LearnerParameter.DEFAULT_DISTRIBUTION)); GradientNormalization m_gradientNormalization = DL4JGradientNormalization .fromToString(lp.getString(LearnerParameter.GRADIENT_NORMALIZATION, LearnerParameter.DEFAULT_GRADIENT_NORMALIZATION)) .getDL4JValue(); Updater m_updater = Updater .valueOf(lp.getString(LearnerParameter.UPDATER, LearnerParameter.DEFAULT_UPDATER)); Map<Integer, Double> m_momentumAfter = ParameterUtils .convertStringToMap(lp.getString(LearnerParameter.MOMENTUM_AFTER, LearnerParameter.DEFAULT_MAP)); OptimizationAlgorithm m_optimization = DL4JOptimizationAlgorithm.fromToString( lp.getString(LearnerParameter.OPTIMIZATION_ALGORITHM, LearnerParameter.DEFAULT_OPTIMIZATION)) .getDL4JValue(); final NeuralNetConfiguration.Builder nnConfigBuilder = new NeuralNetConfiguration.Builder(); /* * Since 0.9.1: Use DL4J workspaces which makes training faster and we can reduce the number of GC calls of DL4J. * For now use WorkspaceMode.SEPARATE for both training and inference. */ nnConfigBuilder.inferenceWorkspaceMode(WorkspaceMode.SEPARATE); nnConfigBuilder.trainingWorkspaceMode(WorkspaceMode.SEPARATE); /* * Need to overwrite global parameters for each layer separately as setting the parameter * in the NeuralNetConfiguration does not overwrite it when it was already set in the * specific layer. Hence, need to clone layers to not alter original layers. */ final List<Layer> layersCopy = DLModelPortObjectUtils.cloneLayers(layers); if (m_useGlobalDropOut) { overwriteDropOut(layersCopy, m_dropOut); nnConfigBuilder.dropOut(m_dropOut); } if (m_useGlobalWeightInit) { overwriteWeightInit(layersCopy, m_weightInit); nnConfigBuilder.weightInit(m_weightInit); if (m_weightInit.equals(ParameterUtils.DISTRIBUTION_PARAMETER_CONDITION)) { Distribution dist; switch (m_distribution) { case BINOMIAL: dist = new BinomialDistribution(m_distributionBinomialTrails, m_distributionBinomialProbability); break; case NORMAL: dist = new NormalDistribution(m_distributionMean, m_distributionSTD); break; case UNIFORM: dist = new UniformDistribution(m_distributionLowerBound, m_distributionUpperBound); break; default: throw new IllegalArgumentException("No case defined for DL4JDistribution: " + m_distribution); } nnConfigBuilder.dist(dist); } } if (m_useGlobalLearningRate) { overwriteLearningRate(layersCopy, m_learningRate); } //setup number of input and output neurons //ConfigurationUtils.setupLayers(layersCopy, m_nIn); //substituted with 'setInputType(InputType.feedForward(m_nIn)' if (m_useSeed) { nnConfigBuilder.seed(m_seed); } if (m_useGradientNormalization) { nnConfigBuilder.gradientNormalization(m_gradientNormalization); nnConfigBuilder.gradientNormalizationThreshold(m_gradientNormalizationThreshold); } if (m_useRegularization) { nnConfigBuilder.regularization(true); nnConfigBuilder.l1(m_l1); nnConfigBuilder.l2(m_l2); } // momentum moved to updaters, will only be used with NESTEROVS // if (m_useMomentum) { // nnConfigBuilder.momentum(m_momentum); // nnConfigBuilder.momentumAfter(m_momentumAfter); // } if (m_useDropConnect) { nnConfigBuilder.useDropConnect(true); } // the learning rate policy behaves strange, need to revise conditions if we want to add it // if (m_useAdvancedLearningRate) { // nnConfigBuilder.learningRateDecayPolicy(m_lrPolicy); // nnConfigBuilder.lrPolicyDecayRate(m_lrPolicyDecayRate); // if (m_lrPolicy.equals(ParameterUtils.LR_POWER_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicyPower(m_lrPolicyPower); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCHEDULE_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateSchedule(m_learningRateAfter); // } else if (m_lrPolicy.equals(ParameterUtils.LR_SCORE_BASED_PARAMETER_CONDITION)) { // nnConfigBuilder.learningRateScoreBasedDecayRate(m_lrPolicyScoreDecayRate); // } else if (m_lrPolicy.equals(ParameterUtils.LR_STEPS_PARAMETER_CONDITION)) { // nnConfigBuilder.lrPolicySteps(m_lrPolicySteps); // } else if (m_lrPolicy.equals(ParameterUtils.LR_EXPONENTIAL_PARAMETER_CONDITION)) { // //no extra param // } // } if (m_useBiasInit) { nnConfigBuilder.biasInit(m_biasInit); } if (m_useBiasLearningRate) { overwriteBiasLearningRate(layersCopy, m_biasLearningRate); } nnConfigBuilder.iterations(m_iterations); nnConfigBuilder.updater(m_updater); if (m_updater.equals(ParameterUtils.ADAM_PARAMETER_CONDITION)) { nnConfigBuilder.adamMeanDecay(m_adamMeanDecay); nnConfigBuilder.adamVarDecay(m_adamVarDecay); } else if (m_updater.equals(ParameterUtils.ADADELTA_PARAMETER_CONDITION)) { nnConfigBuilder.rho(m_adadeltaRho); } else if (m_updater.equals(ParameterUtils.RMSPROP_PARAMETER_CONDITION)) { nnConfigBuilder.rmsDecay(m_rmsDecay); } else if (m_updater.equals(ParameterUtils.NESTEROVS_PARAMETER_CONDITION)) { nnConfigBuilder.momentum(m_momentum); nnConfigBuilder.momentumAfter(m_momentumAfter); } //The new dialogs have a use updater checkbox which is not present in the old dialogs and is true by default. Therefore, //we overwrite the updater with the default values, if the checkbox is not checked. For the old dialog this will not happen //as the value is true by default. if (!m_useUpdater) { nnConfigBuilder.updater(Updater.valueOf(LearnerParameter.DEFAULT_UPDATER)); nnConfigBuilder.momentum(LearnerParameter.DEFAULT_MOMENTUM); } nnConfigBuilder.optimizationAlgo(m_optimization); if (ParameterUtils.MAX_LINE_SEARCH_ITERATIONS_CONDITION.contains(m_optimization)) { nnConfigBuilder.maxNumLineSearchIterations(m_maxNumLineSearchIterations); } //very strange dl4j behaviour, for unsupervised layers (RBM, Autoencoder) we need //to modify the step function elswise learning does not work (error does not decrease) //nnConfigBuilder.stepFunction(new DefaultStepFunction()); final NeuralNetConfiguration.ListBuilder listBuilder = nnConfigBuilder.list(); int currentLayerIndex = 0; for (final Layer layer : layersCopy) { listBuilder.layer(currentLayerIndex, layer); currentLayerIndex++; } listBuilder.pretrain(m_usePretrain); listBuilder.backprop(m_useBackprop); //infer correct number of inputs and outputs for each layer by using the number of inputs of the first layer //and the number of outputs for the following ones listBuilder.setInputType(InputType.feedForward(m_nIn)); return listBuilder; }