Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package nl.systemsgenetics.cellTypeSpecificAlleleSpecificExpression; import java.util.ArrayList; import org.apache.commons.math3.exception.TooManyEvaluationsException; import org.apache.commons.math3.optim.InitialGuess; import org.apache.commons.math3.optim.MaxEval; import org.apache.commons.math3.optim.PointValuePair; import org.apache.commons.math3.optim.nonlinear.scalar.GoalType; import org.apache.commons.math3.optim.nonlinear.scalar.ObjectiveFunction; import org.apache.commons.math3.optim.nonlinear.scalar.noderiv.NelderMeadSimplex; import org.apache.commons.math3.optim.nonlinear.scalar.noderiv.SimplexOptimizer; import org.apache.commons.math3.optim.univariate.SearchInterval; /** * * @author adriaan */ class CTSbinomialTest { //SNP information private final String snpName; private final String chromosome; private final String position; //Information about the input data private final int numberOfHets; //Names of individuals for easy reference private ArrayList<String> hetSampleNames; //The ArrayLists below are for //Data that will be actually used //In calculation. private ArrayList<Integer> asRef; private ArrayList<Integer> asAlt; private ArrayList<Integer> asNo; private ArrayList<Double> cellProp; //Specific parameters for the cell type specific test double MLEratioCellType; double MLEratioResidual; //standard test output: int iterations; double nullLogLik; double altLogLik; double pVal; double chiSq; //some settings boolean outPutAllData = false; boolean testPerformed = false; boolean testConverged = false; public CTSbinomialTest(ArrayList<IndividualSnpData> all_individuals) throws Exception { //basic information, get the zero instance. snpName = all_individuals.get(0).getSnpName(); chromosome = all_individuals.get(0).getChromosome(); position = all_individuals.get(0).getPosition(); ArrayList<IndividualSnpData> het_individuals = UtilityMethods .isolateHeterozygotesFromIndividualSnpData(all_individuals); numberOfHets = het_individuals.size(); hetSampleNames = new ArrayList<String>(); asRef = new ArrayList<Integer>(); asAlt = new ArrayList<Integer>(); asNo = new ArrayList<Integer>(); cellProp = new ArrayList<Double>(); int total_overlap = 0; int ref_total = 0; for (IndividualSnpData temp_het : het_individuals) { //Do nothing if there is no data in het_individuals hetSampleNames.add(temp_het.getSampleName()); asRef.add(temp_het.getRefNum()); asAlt.add(temp_het.getAltNum()); asNo.add(temp_het.getNoNum()); cellProp.add(temp_het.getCellTypeProp()); //this is used to check if we will continue with calculations. total_overlap += temp_het.getRefNum() + temp_het.getAltNum(); ref_total += temp_het.getRefNum(); } if ((total_overlap >= GlobalVariables.minReads) && (numberOfHets >= GlobalVariables.minHets)) { Integer[] asRefArray = asRef.toArray(new Integer[asRef.size()]); Integer[] asAltArray = asAlt.toArray(new Integer[asAlt.size()]); Double[] cellPropArray = cellProp.toArray(new Double[cellProp.size()]); // Just using the alternative likelihood of the binomial test here. // As out null loglikelihood. try { CTSnullBinomialLikelihood CTSbinomNull = new CTSnullBinomialLikelihood(asRefArray, asAltArray, cellPropArray); double nullProp = ((double) ref_total / (double) total_overlap); double[] nullInput = { nullProp }; nullLogLik = CTSbinomNull.value(nullInput); if (GlobalVariables.verbosity >= 100) { System.out.println("LogLik of NULL"); System.out.println("\tResidual ratio: " + Double.toString(nullInput[0])); System.out.println("\tnullLogLik: " + Double.toString(nullLogLik)); } // Now do MLE of the alternative test // more computationally intensive // than the previous one. CTSaltBinomialLikelihood CTSbinom = new CTSaltBinomialLikelihood(asRefArray, asAltArray, cellPropArray); NelderMeadSimplex simplex; simplex = new NelderMeadSimplex(2, 1.0, 1.0, 2.0, 0.25, 0.25); SimplexOptimizer optimizer = new SimplexOptimizer(GlobalVariables.simplexThreshold, GlobalVariables.simplexThreshold); PointValuePair solutionAlt = optimizer.optimize(new ObjectiveFunction(CTSbinom), new MaxEval(GlobalVariables.maximumIterations), simplex, GoalType.MINIMIZE, new InitialGuess(new double[] { 0, nullInput[0] }), new SearchInterval(0.0, 1.0)); double[] valueAlt = solutionAlt.getPoint(); /* In simulations it was found that in about 30% of the cases A solution was not found, so we do it again if there */ if ((optimizer.getIterations() <= 5)) { if (GlobalVariables.verbosity >= 100) { System.out.println("\nfirst starting point was already in a minimum."); System.out.println("Trying with other starting values."); } ArrayList<double[]> StartingValueList = new ArrayList<double[]>(); //These are the starting values to try StartingValueList.add(new double[] { 0.5, 0.0 }); StartingValueList.add(new double[] { 0.5, 0.5 }); StartingValueList.add(new double[] { 0.0, 0.0 }); StartingValueList.add(new double[] { 0.75, 0.0 }); StartingValueList.add(new double[] { 0.0, 0.75 }); for (double[] startingValue : StartingValueList) { SimplexOptimizer newOptimizer = new SimplexOptimizer(GlobalVariables.simplexThreshold, GlobalVariables.simplexThreshold); PointValuePair newSolutionAlt = newOptimizer.optimize(new ObjectiveFunction(CTSbinom), new MaxEval(GlobalVariables.maximumIterations), simplex, GoalType.MINIMIZE, new InitialGuess(startingValue), new SearchInterval(0.0, 1.0)); double[] newValueAlt = newSolutionAlt.getPoint(); if (CTSbinom.value(newValueAlt) < CTSbinom.value(valueAlt)) { if (GlobalVariables.verbosity >= 100 && newOptimizer.getIterations() >= 5) { System.out.println("New starting values are a better fit to the data"); System.out.println("keeping results of the new starting values.\n"); } //Asign the new values to the actual used ones. valueAlt = newValueAlt; optimizer = newOptimizer; break; } } } altLogLik = CTSbinom.value(valueAlt); iterations = optimizer.getIterations(); MLEratioCellType = valueAlt[1]; MLEratioResidual = valueAlt[0]; //chi squared statistic is determined based on both null and alt loglikelihoods. chiSq = LikelihoodFunctions.ChiSqFromLogLik(nullLogLik, altLogLik); //determine P value based on distribution pVal = LikelihoodFunctions.determinePvalFrom1DFchiSq(chiSq); if (GlobalVariables.verbosity >= 10) { System.out.println("\n--- Starting cell type specific binomial LRT test estimate ---"); System.out.println("LogLik of Alt converged to a threshold of " + Double.toString(GlobalVariables.simplexThreshold)); System.out.println("\tCelltype ratio: " + Double.toString(valueAlt[0])); System.out.println("\tResidual ratio: " + Double.toString(valueAlt[1])); System.out .println("\tIterations to converge: " + Integer.toString(iterations) + "\n"); System.out.println("\tNull log likelihood: " + Double.toString(nullLogLik)); System.out.println("\tAlt log likelihood: " + Double.toString(altLogLik) + "\n"); System.out.println("\tChisq statistic: " + Double.toString(chiSq)); System.out.println("\tP value: " + Double.toString(pVal)); System.out.println("--------------------------------------------------------------"); } testConverged = true; } catch (TooManyEvaluationsException e) { if (GlobalVariables.verbosity >= 1) { System.out.println("WARNING: Did not converge to a solution for SNP: " + snpName + "in cell type specific beta binomial"); System.out.println(" After " + Integer.toString(GlobalVariables.maximumIterations) + " iterations."); System.out.println(" Continue-ing with the next."); System.out.println("--------------------------------------------------------------"); } } // Add some extra values that will make sure that there could be some kind of non-convergence. testPerformed = true; } } public String writeTestStatistics(boolean all_data) { String out = ""; //snp info out += chromosome + "\t"; out += position + "\t"; out += snpName + "\t"; if (testPerformed && testConverged) { out += numberOfHets + "\t"; // lets have a look at how the decimals behave here, seems all right. // otherwise i think I have to do something with printf or something. out += Double.toString(pVal) + "\t"; out += Double.toString(chiSq) + "\t"; out += Double.toString(MLEratioCellType) + "\t"; out += Double.toString(MLEratioResidual) + "\t"; out += Double.toString(nullLogLik) + "\t"; out += Double.toString(altLogLik) + "\t"; if (outPutAllData) { String samples_string = ""; String ref_string = ""; String alt_string = ""; String no_string = ""; String disp_string = ""; for (int i = 0; i < hetSampleNames.size(); i++) { //samples_string += hetSampleNames.get(i) + ";"; ref_string += Integer.toString(asRef.get(i)) + ";"; alt_string += Integer.toString(asAlt.get(i)) + ";"; no_string += Integer.toString(asNo.get(i)) + ";"; disp_string += Double.toString(cellProp.get(i)) + ";"; } //remove last delimiter //samples_string = samples_string.substring(0, samples_string.length()-1); ref_string = ref_string.substring(0, ref_string.length() - 1); alt_string = alt_string.substring(0, alt_string.length() - 1); no_string = no_string.substring(0, no_string.length() - 1); disp_string = disp_string.substring(0, disp_string.length() - 1); //out += "\t" + samples_string + "\t" + ref_string + "\t" + alt_string + "\t" + no_string; out += "\t" + ref_string + "\t" + alt_string + "\t" + no_string + "\t" + disp_string; } } else { //when no testing is done, will only output snp name and position, and NA. //Make sure this is still correct. for (int i = 0; i < 6; i++) { out += "NA\t"; } if (outPutAllData) { for (int i = 0; i < 5; i++) { out += "NA\t"; } } out += "NA"; } return out; } public boolean isTestPerformed() { return testPerformed; } }