Example usage for org.apache.commons.math3.optim.nonlinear.scalar.gradient BoundedNonLinearConjugateGradientOptimizer BoundedNonLinearConjugateGradientOptimizer

Introduction

In this page you can find the example usage for org.apache.commons.math3.optim.nonlinear.scalar.gradient BoundedNonLinearConjugateGradientOptimizer BoundedNonLinearConjugateGradientOptimizer.

Prototype

public BoundedNonLinearConjugateGradientOptimizer(final Formula updateFormula,
        ConvergenceChecker<PointValuePair> checker)

Source Link

Document

Constructor with default BrentSolver line search solver and IdentityPreconditioner preconditioner .

Usage

From source file:gdsc.smlm.fitting.nonlinear.MaximumLikelihoodFitter.java

public FitStatus fit(int n, double[] y, double[] y_fit, double[] a, double[] a_dev, double[] error,
        double noise) {
    numberOfFittedPoints = n;/*  w  ww.  j  a v  a2  s . co m*/

    LikelihoodWrapper maximumLikelihoodFunction;

    // We can use different likelihood wrapper functions:
    switch (likelihoodFunction) {
    case POISSON_GAMMA_GAUSSIAN:
        // Poisson-Gamma-Gaussian - EM-CCD data
        if (alpha > 0 && sigma > 0) {
            maximumLikelihoodFunction = new PoissonGammaGaussianLikelihoodWrapper(f, a, y, n, alpha, sigma);
            break;
        }

    case POISSON_GAUSSIAN:
        // Poisson-Gaussian - CCD data
        if (sigma > 0) {
            maximumLikelihoodFunction = new PoissonGaussianLikelihoodWrapper(f, a, y, n, sigma);
            break;
        }

    case POISSON:
    default:
        // Poisson - most counting data
        maximumLikelihoodFunction = new PoissonLikelihoodWrapper(f, a, y, n);
    }

    // Check if the method requires the gradient but it cannot be computed
    if (searchMethod.usesGradient && !maximumLikelihoodFunction.canComputeGradient()) {
        maximumLikelihoodFunction = new PoissonLikelihoodWrapper(f, a, y, n);
    }

    try {
        double[] startPoint = getInitialSolution(a);

        PointValuePair optimum = null;
        if (searchMethod == SearchMethod.POWELL || searchMethod == SearchMethod.POWELL_BOUNDED) {
            // Non-differentiable version using Powell Optimiser

            // This is as per the method in Numerical Recipes 10.5 (Direction Set (Powell's) method)
            // I could extend the optimiser and implement bounds on the directions moved. However the mapping
            // adapter seems to work OK.

            final boolean basisConvergence = false;

            // Perhaps these thresholds should be tighter?
            // The default is to use the sqrt() of the overall tolerance
            //final double lineRel = FastMath.sqrt(relativeThreshold);
            //final double lineAbs = FastMath.sqrt(absoluteThreshold);
            //final double lineRel = relativeThreshold * 1e2;
            //final double lineAbs = absoluteThreshold * 1e2;

            // Since we are fitting only a small number of parameters then just use the same tolerance 
            // for each search direction
            final double lineRel = relativeThreshold;
            final double lineAbs = absoluteThreshold;

            CustomPowellOptimizer o = new CustomPowellOptimizer(relativeThreshold, absoluteThreshold, lineRel,
                    lineAbs, null, basisConvergence);

            OptimizationData maxIterationData = null;
            if (getMaxIterations() > 0)
                maxIterationData = new MaxIter(getMaxIterations());

            if (searchMethod == SearchMethod.POWELL) {
                if (powellFunction == null) {
                    // We must map all the parameters into the same range. This is done in the Mortensen MLE 
                    // Python code by using the sqrt of the number of photons and background.
                    if (mapGaussian) {
                        Gaussian2DFunction gf = (Gaussian2DFunction) f;
                        // Re-map signal and background using the sqrt
                        int[] indices = gf.gradientIndices();
                        int[] map = new int[indices.length];
                        int count = 0;
                        // Background is always first
                        if (indices[0] == Gaussian2DFunction.BACKGROUND) {
                            map[count++] = 0;
                        }
                        // Look for the Signal in multiple peak 2D Gaussians
                        for (int i = 1; i < indices.length; i++)
                            if (indices[i] % 6 == Gaussian2DFunction.SIGNAL) {
                                map[count++] = i;
                            }
                        if (count > 0) {
                            powellFunction = new MappedMultivariateLikelihood(maximumLikelihoodFunction,
                                    Arrays.copyOf(map, count));
                        }
                    }
                    if (powellFunction == null) {
                        powellFunction = new MultivariateLikelihood(maximumLikelihoodFunction);
                    }
                }

                // Update the maximum likelihood function in the Powell function wrapper
                powellFunction.fun = maximumLikelihoodFunction;

                OptimizationData positionChecker = null;
                // new org.apache.commons.math3.optim.PositionChecker(relativeThreshold, absoluteThreshold);
                if (powellFunction.isMapped()) {
                    MappedMultivariateLikelihood adapter = (MappedMultivariateLikelihood) powellFunction;
                    optimum = o.optimize(maxIterationData, new MaxEval(getMaxEvaluations()),
                            new ObjectiveFunction(powellFunction), GoalType.MINIMIZE,
                            new InitialGuess(adapter.map(startPoint)), positionChecker);
                    double[] solution = adapter.unmap(optimum.getPointRef());
                    optimum = new PointValuePair(solution, optimum.getValue());
                } else {
                    optimum = o.optimize(maxIterationData, new MaxEval(getMaxEvaluations()),
                            new ObjectiveFunction(powellFunction), GoalType.MINIMIZE,
                            new InitialGuess(startPoint), positionChecker);
                }
            } else {
                // Try using the mapping adapter for a bounded Powell search
                MultivariateFunctionMappingAdapter adapter = new MultivariateFunctionMappingAdapter(
                        new MultivariateLikelihood(maximumLikelihoodFunction), lower, upper);
                optimum = o.optimize(maxIterationData, new MaxEval(getMaxEvaluations()),
                        new ObjectiveFunction(adapter), GoalType.MINIMIZE,
                        new InitialGuess(adapter.boundedToUnbounded(startPoint)));
                double[] solution = adapter.unboundedToBounded(optimum.getPointRef());
                optimum = new PointValuePair(solution, optimum.getValue());
            }
            iterations = o.getIterations();
            evaluations = o.getEvaluations();
        } else if (searchMethod == SearchMethod.BOBYQA) {
            // Differentiable approximation using Powell's BOBYQA algorithm.
            // This is slower than the Powell optimiser and requires a high number of evaluations.
            int numberOfInterpolationPoints = this.getNumberOfFittedParameters() + 2;

            BOBYQAOptimizer o = new BOBYQAOptimizer(numberOfInterpolationPoints);
            optimum = o.optimize(new MaxEval(getMaxEvaluations()),
                    new ObjectiveFunction(new MultivariateLikelihood(maximumLikelihoodFunction)),
                    GoalType.MINIMIZE, new InitialGuess(startPoint), new SimpleBounds(lower, upper));
            iterations = o.getIterations();
            evaluations = o.getEvaluations();
        } else if (searchMethod == SearchMethod.CMAES) {
            // TODO - Understand why the CMAES optimiser does not fit very well on test data. It appears 
            // to converge too early and the likelihood scores are not as low as the other optimisers.

            // CMAESOptimiser based on Matlab code:
            // https://www.lri.fr/~hansen/cmaes.m
            // Take the defaults from the Matlab documentation
            double stopFitness = 0; //Double.NEGATIVE_INFINITY;
            boolean isActiveCMA = true;
            int diagonalOnly = 0;
            int checkFeasableCount = 1;
            RandomGenerator random = new Well19937c();
            boolean generateStatistics = false;
            // The sigma determines the search range for the variables. It should be 1/3 of the initial search region.
            double[] sigma = new double[lower.length];
            for (int i = 0; i < sigma.length; i++)
                sigma[i] = (upper[i] - lower[i]) / 3;
            int popSize = (int) (4 + Math.floor(3 * Math.log(sigma.length)));

            // The CMAES optimiser is random and restarting can overcome problems with quick convergence.
            // The Apache commons documentations states that convergence should occur between 30N and 300N^2
            // function evaluations
            final int n30 = FastMath.min(sigma.length * sigma.length * 30, getMaxEvaluations() / 2);
            evaluations = 0;
            OptimizationData[] data = new OptimizationData[] { new InitialGuess(startPoint),
                    new CMAESOptimizer.PopulationSize(popSize), new MaxEval(getMaxEvaluations()),
                    new CMAESOptimizer.Sigma(sigma),
                    new ObjectiveFunction(new MultivariateLikelihood(maximumLikelihoodFunction)),
                    GoalType.MINIMIZE, new SimpleBounds(lower, upper) };
            // Iterate to prevent early convergence
            int repeat = 0;
            while (evaluations < n30) {
                if (repeat++ > 1) {
                    // Update the start point and population size
                    data[0] = new InitialGuess(optimum.getPointRef());
                    popSize *= 2;
                    data[1] = new CMAESOptimizer.PopulationSize(popSize);
                }
                CMAESOptimizer o = new CMAESOptimizer(getMaxIterations(), stopFitness, isActiveCMA,
                        diagonalOnly, checkFeasableCount, random, generateStatistics,
                        new SimpleValueChecker(relativeThreshold, absoluteThreshold));
                PointValuePair result = o.optimize(data);
                iterations += o.getIterations();
                evaluations += o.getEvaluations();
                //System.out.printf("CMAES [%d] i=%d [%d], e=%d [%d]\n", repeat, o.getIterations(), iterations,
                //      o.getEvaluations(), totalEvaluations);
                if (optimum == null || result.getValue() < optimum.getValue()) {
                    optimum = result;
                }
            }
        } else if (searchMethod == SearchMethod.BFGS) {
            // BFGS can use an approximate line search minimisation where as Powell and conjugate gradient
            // methods require a more accurate line minimisation. The BFGS search does not do a full 
            // minimisation but takes appropriate steps in the direction of the current gradient.

            // Do not use the convergence checker on the value of the function. Use the convergence on the 
            // point coordinate and gradient
            //BFGSOptimizer o = new BFGSOptimizer(new SimpleValueChecker(rel, abs));
            BFGSOptimizer o = new BFGSOptimizer();

            // Configure maximum step length for each dimension using the bounds
            double[] stepLength = new double[lower.length];
            for (int i = 0; i < stepLength.length; i++) {
                stepLength[i] = (upper[i] - lower[i]) * 0.3333333;
                if (stepLength[i] <= 0)
                    stepLength[i] = Double.POSITIVE_INFINITY;
            }

            // The GoalType is always minimise so no need to pass this in
            OptimizationData positionChecker = null;
            //new org.apache.commons.math3.optim.PositionChecker(relativeThreshold, absoluteThreshold);
            optimum = o.optimize(new MaxEval(getMaxEvaluations()),
                    new ObjectiveFunctionGradient(new MultivariateVectorLikelihood(maximumLikelihoodFunction)),
                    new ObjectiveFunction(new MultivariateLikelihood(maximumLikelihoodFunction)),
                    new InitialGuess(startPoint), new SimpleBounds(lowerConstraint, upperConstraint),
                    new BFGSOptimizer.GradientTolerance(relativeThreshold), positionChecker,
                    new BFGSOptimizer.StepLength(stepLength));
            iterations = o.getIterations();
            evaluations = o.getEvaluations();
        } else {
            // The line search algorithm often fails. This is due to searching into a region where the 
            // function evaluates to a negative so has been clipped. This means the upper bound of the line
            // cannot be found.
            // Note that running it on an easy problem (200 photons with fixed fitting (no background)) the algorithm
            // does sometimes produces results better than the Powell algorithm but it is slower.

            BoundedNonLinearConjugateGradientOptimizer o = new BoundedNonLinearConjugateGradientOptimizer(
                    (searchMethod == SearchMethod.CONJUGATE_GRADIENT_FR) ? Formula.FLETCHER_REEVES
                            : Formula.POLAK_RIBIERE,
                    new SimpleValueChecker(relativeThreshold, absoluteThreshold));

            // Note: The gradients may become unstable at the edge of the bounds. Or they will not change 
            // direction if the true solution is on the bounds since the gradient will always continue 
            // towards the bounds. This is key to the conjugate gradient method. It searches along a vector 
            // until the direction of the gradient is in the opposite direction (using dot products, i.e. 
            // cosine of angle between them)

            // NR 10.7 states there is no advantage of the variable metric DFP or BFGS methods over
            // conjugate gradient methods. So I will try these first.

            // Try this:
            // Adapt the conjugate gradient optimiser to use the gradient to pick the search direction
            // and then for the line minimisation. However if the function is out of bounds then clip the 
            // variables at the bounds and continue. 
            // If the current point is at the bounds and the gradient is to continue out of bounds then 
            // clip the gradient too.

            // Or: just use the gradient for the search direction then use the line minimisation/rest
            // as per the Powell optimiser. The bounds should limit the search.

            // I tried a Bounded conjugate gradient optimiser with clipped variables:
            // This sometimes works. However when the variables go a long way out of the expected range the gradients
            // can have vastly different magnitudes. This results in the algorithm stalling since the gradients
            // can be close to zero and the some of the parameters are no longer adjusted.
            // Perhaps this can be looked for and the algorithm then gives up and resorts to a Powell optimiser from 
            // the current point.

            // Changed the bracketing step to very small (default is 1, changed to 0.001). This improves the 
            // performance. The gradient direction is very sensitive to small changes in the coordinates so a 
            // tighter bracketing of the line search helps.

            // Tried using a non-gradient method for the line search copied from the Powell optimiser:
            // This also works when the bracketing step is small but the number of iterations is higher.

            // 24.10.2014: I have tried to get conjugate gradient to work but the gradient function 
            // must not behave suitably for the optimiser. In the current state both methods of using a 
            // Bounded Conjugate Gradient Optimiser perform poorly relative to other optimisers:
            // Simulated : n=1000, signal=200, x=0.53, y=0.47
            // LVM : n=1000, signal=171, x=0.537, y=0.471 (1.003s)
            // Powell : n=1000, signal=187, x=0.537, y=0.48 (1.238s)
            // Gradient based PR (constrained): n=858, signal=161, x=0.533, y=0.474 (2.54s)
            // Gradient based PR (bounded): n=948, signal=161, x=0.533, y=0.473 (2.67s)
            // Non-gradient based : n=1000, signal=151.47, x=0.535, y=0.474 (1.626s)
            // The conjugate optimisers are slower, under predict the signal by the most and in the case of 
            // the gradient based optimiser, fail to converge on some problems. This is worse when constrained
            // fitting is used and not tightly bounded fitting.
            // I will leave the code in as an option but would not recommend using it. I may remove it in the 
            // future.

            // Note: It is strange that the non-gradient based line minimisation is more successful.
            // It may be that the gradient function is not accurate (due to round off error) or that it is
            // simply wrong when far from the optimum. My JUnit tests only evaluate the function within the 
            // expected range of the answer.

            // Note the default step size on the Powell optimiser is 1 but the initial directions are unit vectors.
            // So our bracketing step should be a minimum of 1 / average length of the first gradient vector to prevent
            // the first step being too large when bracketing.
            final double gradient[] = new double[startPoint.length];
            maximumLikelihoodFunction.likelihood(startPoint, gradient);
            double l = 0;
            for (double d : gradient)
                l += d * d;
            final double bracketingStep = FastMath.min(0.001, ((l > 1) ? 1.0 / l : 1));
            //System.out.printf("Bracketing step = %f (length=%f)\n", bracketingStep, l);

            o.setUseGradientLineSearch(gradientLineMinimisation);

            optimum = o.optimize(new MaxEval(getMaxEvaluations()),
                    new ObjectiveFunctionGradient(new MultivariateVectorLikelihood(maximumLikelihoodFunction)),
                    new ObjectiveFunction(new MultivariateLikelihood(maximumLikelihoodFunction)),
                    GoalType.MINIMIZE, new InitialGuess(startPoint),
                    new SimpleBounds(lowerConstraint, upperConstraint),
                    new BoundedNonLinearConjugateGradientOptimizer.BracketingStep(bracketingStep));
            iterations = o.getIterations();
            evaluations = o.getEvaluations();

            //maximumLikelihoodFunction.value(solution, gradient);
            //System.out.printf("Iter = %d, %g @ %s : %s\n", iterations, ll, Arrays.toString(solution),
            //      Arrays.toString(gradient));
        }

        final double[] solution = optimum.getPointRef();

        setSolution(a, solution);

        //System.out.printf("Iter = %d, Eval = %d, %g @ %s\n", iterations, evaluations, optimum.getValue(), 
        //   java.util.Arrays.toString(solution));

        // Compute residuals for the FunctionSolver interface
        if (y_fit == null || y_fit.length < n)
            y_fit = new double[n];
        f.initialise(a);
        residualSumOfSquares = 0;
        for (int i = 0; i < n; i++) {
            y_fit[i] = f.eval(i);
            final double residual = y[i] - y_fit[i];
            residualSumOfSquares += residual * residual;
        }

        if (a_dev != null) {
            // Assume the Maximum Likelihood estimator returns the optimum fit (achieves the Cramer Roa
            // lower bounds) and so the covariance can be obtained from the Fisher Information Matrix. 
            final int[] gradientIndices = f.gradientIndices();
            final int nparams = gradientIndices.length;
            GradientCalculator calculator = GradientCalculatorFactory.newCalculator(nparams);
            final double[] I = calculator.fisherInformationDiagonal(n, a, f);
            for (int i = 0; i < gradientIndices.length; i++)
                a_dev[gradientIndices[i]] = 1.0 / Math.sqrt(I[i]);
        }

        error[0] = NonLinearFit.getError(residualSumOfSquares, noise, n, f.gradientIndices().length);
        totalSumOfSquares = getSumOfSquares(n, y);
    } catch (TooManyIterationsException e) {
        //System.out.printf("Too many iterations = %d\n", e.getMax());
        //e.printStackTrace();
        return FitStatus.FAILED_TO_CONVERGE;
    } catch (TooManyEvaluationsException e) {
        //System.out.printf("Too many evaluations = %d\n", e.getMax());
        //e.printStackTrace();
        return FitStatus.FAILED_TO_CONVERGE;
    } catch (ConvergenceException e) {
        // Occurs when QR decomposition fails - mark as a singular non-linear model (no solution)
        //System.out.printf("Singular non linear model = %s\n", e.getMessage());
        return FitStatus.SINGULAR_NON_LINEAR_MODEL;
    } catch (BFGSOptimizer.LineSearchRoundoffException e) {
        //System.out.println("BFGS error: " + e.getMessage());
        //e.printStackTrace();
        return FitStatus.FAILED_TO_CONVERGE;
    } catch (Exception e) {
        //System.out.printf("Unknown error = %s\n", e.getMessage());
        e.printStackTrace();
        return FitStatus.UNKNOWN;
    }

    return FitStatus.OK;
}