List of usage examples for org.apache.commons.math3.stat.descriptive.moment Mean clear
@Override public void clear()
From source file:com.sciaps.utils.Util.java
public static Spectrum createAverage(Collection<? extends Spectrum> shots, double sampleRate) { Min minWL = new Min(); Max maxWL = new Max(); for (Spectrum shot : shots) { minWL.increment(shot.getValidRange().getMinimumDouble()); maxWL.increment(shot.getValidRange().getMaximumDouble()); }/*from ww w .j ava 2 s . c om*/ double range = maxWL.getResult() - minWL.getResult(); int numSamples = (int) Math.floor(range * sampleRate); double[][] data = new double[2][numSamples]; Mean avgy = new Mean(); for (int i = 0; i < numSamples; i++) { double x = minWL.getResult() + i * (1 / sampleRate); avgy.clear(); for (Spectrum shot : shots) { if (shot.getValidRange().containsDouble(x)) { UnivariateFunction iv = shot.getIntensityFunction(); double y = iv.value(x); avgy.increment(y); } } data[0][i] = x; data[1][i] = avgy.getResult(); } RawDataSpectrum newSpectrum = new RawDataSpectrum(data); return newSpectrum; }
From source file:com.cloudera.oryx.app.serving.als.model.ALSServingModelTest.java
@Test public void testLSHEffect() { RandomGenerator random = RandomManager.getRandom(); PoissonDistribution itemPerUserDist = new PoissonDistribution(random, 20, PoissonDistribution.DEFAULT_EPSILON, PoissonDistribution.DEFAULT_MAX_ITERATIONS); int features = 20; ALSServingModel mainModel = new ALSServingModel(features, true, 1.0, null); ALSServingModel lshModel = new ALSServingModel(features, true, 0.5, null); int userItemCount = 20000; for (int user = 0; user < userItemCount; user++) { String userID = "U" + user; float[] vec = VectorMath.randomVectorF(features, random); mainModel.setUserVector(userID, vec); lshModel.setUserVector(userID, vec); int itemsPerUser = itemPerUserDist.sample(); Collection<String> knownIDs = new ArrayList<>(itemsPerUser); for (int i = 0; i < itemsPerUser; i++) { knownIDs.add("I" + random.nextInt(userItemCount)); }//ww w. j a va2 s . c o m mainModel.addKnownItems(userID, knownIDs); lshModel.addKnownItems(userID, knownIDs); } for (int item = 0; item < userItemCount; item++) { String itemID = "I" + item; float[] vec = VectorMath.randomVectorF(features, random); mainModel.setItemVector(itemID, vec); lshModel.setItemVector(itemID, vec); } int numRecs = 10; Mean meanMatchLength = new Mean(); for (int user = 0; user < userItemCount; user++) { String userID = "U" + user; List<Pair<String, Double>> mainRecs = mainModel .topN(new DotsFunction(mainModel.getUserVector(userID)), null, numRecs, null) .collect(Collectors.toList()); List<Pair<String, Double>> lshRecs = lshModel .topN(new DotsFunction(lshModel.getUserVector(userID)), null, numRecs, null) .collect(Collectors.toList()); int i = 0; while (i < lshRecs.size() && i < mainRecs.size() && lshRecs.get(i).equals(mainRecs.get(i))) { i++; } meanMatchLength.increment(i); } log.info("Mean matching prefix: {}", meanMatchLength.getResult()); assertGreaterOrEqual(meanMatchLength.getResult(), 4.0); meanMatchLength.clear(); for (int item = 0; item < userItemCount; item++) { String itemID = "I" + item; List<Pair<String, Double>> mainRecs = mainModel .topN(new CosineAverageFunction(mainModel.getItemVector(itemID)), null, numRecs, null) .collect(Collectors.toList()); List<Pair<String, Double>> lshRecs = lshModel .topN(new CosineAverageFunction(lshModel.getItemVector(itemID)), null, numRecs, null) .collect(Collectors.toList()); int i = 0; while (i < lshRecs.size() && i < mainRecs.size() && lshRecs.get(i).equals(mainRecs.get(i))) { i++; } meanMatchLength.increment(i); } log.info("Mean matching prefix: {}", meanMatchLength.getResult()); assertGreaterOrEqual(meanMatchLength.getResult(), 5.0); }
From source file:com.itemanalysis.psychometrics.rasch.JMLE.java
/** * Run the primary JML estimation routines. * * To estimate thetas using known item parameters, the known parameters must be established and updateItems * should be set to false.// w w w . ja v a2s . c om * * Extreme persons and items are updated at the same time as nonextreme items and persons. However, the extreme * items and persons are not counted toward the convergence criterion and they are not used to estimate * parameters for the nonextreme items and persons. * * * @param globalMaxIter * @param globalConvergence maximum change in logits (LCONV in WINSTEPS documentation) * @param updateItems set to true if items are to be updated. Note individual items can be fixed too. * @throws SQLException */ public void update(int globalMaxIter, double globalConvergence, boolean updateItems, boolean updatePersons) { double DELTA = globalConvergence + 1.0; //LCONV maxDelta = new Max(); Mean itemMean = new Mean(); int iter = 0; RatingScaleItem rsi = null; RatingScaleThresholds rst = null; double newDifficulty = 0.0; while (DELTA >= globalConvergence && iter < globalMaxIter) { if (updateItems) { itemMean.clear(); //update items that are not fixed for (VariableName v : items.keySet()) { rsi = items.get(v); if (rsi.fixedParameter()) { itemMean.increment(rsi.getDifficulty()); } else { if (!rsi.extremeItem() && !rsi.droppedItem()) { newDifficulty = updateDifficulty(items.get(v), validRIS(rsi.getColumn()), vMPRIS(rsi.getColumn()), 0.0, DELTA); itemMean.increment(newDifficulty); } } } //update thresholds for (String s : thresholds.keySet()) { rst = thresholds.get(s); if (!rst.extremeThreshold() && !rst.fixedParameter()) updateThresholds(rst); } //accept new thresholds and increment delta. Only increments delta for non extreme categories for (String s : thresholds.keySet()) { maxDelta.increment(thresholds.get(s).acceptProposalThresholds()); } //Recenter proposal difficulties, accept proposal difficulties, and increment delta //Extreme items are not recentered, and their change in rho not counted in delta. for (VariableName v : items.keySet()) { rsi = items.get(v); if (!rsi.extremeItem() && !rsi.droppedItem()) { rsi.recenterProposalDifficulty(itemMean.getResult()); maxDelta.increment(rsi.acceptProposalDifficulty()); } /** * Set new threshold rho in RatingScaleItem object */ if (rsi.getNumberOfCategories() > 2) { rsi.setThresholds(thresholds.get(rsi.getGroupId()).getThresholds()); } } } //update persons //Change in person parameter is not counted toward delta. double tDelta = 0.0; if (updatePersons) { for (int i = 0; i < nPeople; i++) { if (!extremePersons[i]) { tDelta = updatePersons(i, validRawScore(data[i]), vMPRS(data[i]), DELTA); maxDelta.increment(tDelta); } } } DELTA = maxDelta.getResult(); maxDelta.clear(); iterationDelta.add(DELTA); //compute residuals for all nonextreme items completed by nonextreme examinees //the residual to compute are the expected score (TCC, iTCC) and observed score (RS, RIS) iter++; } //end JMLE loop }
From source file:com.itemanalysis.psychometrics.irt.estimation.StartingValues.java
/** * Computes normal approximation estimates (PROX) of item difficulty and person ability * in a way that allows for missing data (Linacre, 1994). It is an iterative procedure. * * Linacre, J. M., (1994). PROX with missing data, or known item or person measures. * Rasch Measurement Transactions, 8:3, 378, http://www.rasch.org/rmt/rmt83g.htm. * * @param converge convergence criterion as the maximum change in person logits. * @param maxIter maximum number of iterations. About 10 iterations works well. *//*from www . j av a2s . c om*/ private void prox(double converge, int maxIter) { double delta = 1.0 + converge; int iter = 0; double pProx = 0; double pScore = 0; double maxTestScore = 0; double maxChange = 0; double logit = 0; Mean personGrandMean = new Mean(); StandardDeviation personGrandSd = new StandardDeviation(); double iProx = 0.0; double iMean = 0; theta = new double[nResponseVectors]; Mean[] mPerson = new Mean[nItems];//Item difficulty mean for those examinees completing item j StandardDeviation[] sdPerson = new StandardDeviation[nItems];//Item difficulty standard deviation for those examinees completing item j double[] Si = null; double[] Ni = null; Mean[] mItem = new Mean[nResponseVectors]; StandardDeviation[] sdItem = new StandardDeviation[nResponseVectors]; while (delta > converge && iter < maxIter) { Si = new double[nItems]; Ni = new double[nItems]; //Compute descriptive statistics for persons and items double resp = 0; double freq = 0; for (int l = 0; l < nResponseVectors; l++) { freq = responseVector[l].getFrequency(); for (int j = 0; j < nItems; j++) { //initialize arrays if (l == 0) { mPerson[j] = new Mean(); sdPerson[j] = new StandardDeviation(); } if (j == 0) { mItem[l] = new Mean(); sdItem[l] = new StandardDeviation(); } if (irm[j].getType() == IrmType.L3 || irm[j].getType() == IrmType.L4) { resp = responseVector[l].getResponseAt(j); //increment item and person summary statistics if (resp != -1) { //incorporate weights - crude workaround for (int w = 0; w < freq; w++) { mItem[l].increment(irm[j].getDifficulty()); sdItem[l].increment(irm[j].getDifficulty()); mPerson[j].increment(theta[l]); sdPerson[j].increment(theta[l]); Si[j] += resp; Ni[j]++; } } } } //end item loop } //end summary loop //Compute item PROX for binary items only iMean = 0; double pSd = 1e-8; double ni = 0; for (int j = 0; j < nItems; j++) { if (irm[j].getType() == IrmType.L3 || irm[j].getType() == IrmType.L4) { pSd = sdPerson[j].getResult(); //adjust extreme item scores if (Si[j] == 0) Si[j] += 0.3; if (Si[j] == Ni[j]) Si[j] -= 0.3; logit = Math.log(Si[j] / (Ni[j] - Si[j])); iProx = mPerson[j].getResult() - Math.sqrt(1.0 + pSd / 2.9) * logit; irm[j].setDifficulty(iProx); iMean += iProx; ni++; } } iMean /= ni; //center difficulties about the mean item difficulty for (int j = 0; j < nItems; j++) { if (irm[j].getType() == IrmType.L3 || irm[j].getType() == IrmType.L4) { iProx = irm[j].getDifficulty(); irm[j].setDifficulty(iProx - iMean); } } //Compute person PROX maxChange = 0; personGrandMean.clear(); personGrandSd.clear(); Pair<Double, Double> personScores = null; for (int l = 0; l < nResponseVectors; l++) { personScores = computePersonScores(responseVector[l]); pScore = personScores.getFirst(); maxTestScore = personScores.getSecond(); //adjust extreme person scores if (pScore == 0) pScore += 0.3; if (pScore == maxTestScore) pScore -= 0.3; logit = Math.log(pScore / (maxTestScore - pScore)); pProx = mItem[l].getResult() + Math.sqrt(1.0 + sdItem[l].getResult() / 2.9) * logit; maxChange = Math.max(maxChange, Math.abs(theta[l] - pProx)); theta[l] = pProx; personGrandMean.increment(pProx); personGrandSd.increment(pProx); } delta = maxChange; iter++; fireEMStatusEvent(iter, delta, Double.NaN); } //end while //Linearly transform theta estimate to have a mean of 0 and a standard deviation of 1. //Apply the same transformation to item difficulty values. double A = 1.0 / personGrandSd.getResult(); double B = -A * personGrandMean.getResult(); for (int l = 0; l < nResponseVectors; l++) { theta[l] = theta[l] * A + B; } double a = 1; double b = 0; for (int j = 0; j < nItems; j++) { if (irm[j].getType() == IrmType.L3 || irm[j].getType() == IrmType.L3) { b = irm[j].getDifficulty(); irm[j].setDifficulty(b * A + B); //Adjust discrimination parameter for scaling constant. //PROX assumes a logit scale. This conversion is to convert to the normal metric. a = irm[j].getDiscrimination(); irm[j].setDiscrimination(a / irm[j].getScalingConstant()); } } //For debugging // System.out.println("ITER: " + iter); // for(int j=0;j<nItems;j++){ // System.out.println("PROX: " + irm[j].toString()); // } }
From source file:nl.systemsgenetics.genenetworkbackend.div.CalculateGenePredictability.java
/** * @param args the command line arguments *//* w w w. ja v a 2 s . c o m*/ public static void main(String[] args) throws IOException { File predictionMatrixFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\reactome_predictions.txt.gz"); File annotationMatrixFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\PathwayMatrix\\Ensembl2Reactome_All_Levels.txt_matrix.txt.gz"); File significantTermsFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\reactome_predictions_bonSigTerms_alsoInGoP.txt"); File outputFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\reactome_predictions_genePredictability_alsoInGoP.txt"); // File predictionMatrixFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions.txt.gz"); // File annotationMatrixFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\PathwayMatrix\\ALL_SOURCES_ALL_FREQUENCIES_phenotype_to_genes.txt_matrix.txt.gz"); // File significantTermsFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_bonSigTerms.txt"); // File outputFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_genePredictability.txt"); // // File predictionMatrixFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\go_F_predictions.txt.gz"); // File annotationMatrixFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\PathwayMatrix\\goa_human.gaf_F_matrix.txt.gz"); // File significantTermsFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\go_F_predictions_bonSigTerms.txt"); // File outputFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\go_F_predictions_genePredictability.txt"); // // File predictionMatrixFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\kegg_predictions.txt.gz"); // File annotationMatrixFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\PathwayMatrix\\c2.cp.kegg.v6.1.entrez.gmt_matrix.txt.gz"); // File significantTermsFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\kegg_predictions_bonSigTerms.txt"); // File outputFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\kegg_predictions_genePredictability.txt"); // // File predictionMatrixFile = new File("/groups/umcg-wijmenga/tmp04/umcg-svandam/GeneNetwork/Data31995Genes05-12-2017/GeneNetwork_V2_01-02-2018/Covariates/PCA/predictions/go_P_predictions.txt.gz"); // File annotationMatrixFile = new File("/groups/umcg-wijmenga/tmp04/umcg-svandam/GeneNetwork/Data31995Genes05-12-2017/GeneNetwork_V2_01-02-2018/Covariates/PCA/PathwayMatrix/goa_human.gaf_P_matrix.txt.gz"); // File significantTermsFile = new File("/groups/umcg-wijmenga/tmp04/umcg-svandam/GeneNetwork/Data31995Genes05-12-2017/GeneNetwork_V2_01-02-2018/Covariates/PCA/predictions/go_P_predictions_bonSigTerms_alsoInReactome.txt"); // File outputFile = new File("/groups/umcg-wijmenga/tmp04/umcg-svandam/GeneNetwork/Data31995Genes05-12-2017/GeneNetwork_V2_01-02-2018/Covariates/PCA/predictions/go_P_predictions_genePredictability_alsoInReactome.txt"); LinkedHashSet<String> significantTerms = loadSignificantTerms(significantTermsFile); DoubleMatrixDataset<String, String> predictionMatrix = DoubleMatrixDataset .loadDoubleData(predictionMatrixFile.getAbsolutePath()); DoubleMatrixDataset<String, String> annotationMatrix = DoubleMatrixDataset .loadDoubleData(annotationMatrixFile.getAbsolutePath()); DoubleMatrixDataset<String, String> predictionMatrixSignificant = predictionMatrix .viewColSelection(significantTerms); DoubleMatrixDataset<String, String> annotationMatrixSignificant = annotationMatrix .viewColSelection(significantTerms); if (!predictionMatrixSignificant.getColObjects().equals(annotationMatrixSignificant.getColObjects())) { System.err.println("Differnce in terms"); return; } if (!predictionMatrixSignificant.getRowObjects().equals(annotationMatrixSignificant.getRowObjects())) { System.err.println("Differnce in genes"); return; } MannWhitneyUTest2 uTest = new MannWhitneyUTest2(); Kurtosis kurtosisCalculator = new Kurtosis(); Skewness skewnessCalculator = new Skewness(); Mean annotatedMeanCalculator = new Mean(); Mean notAnnotatedMeanCalculator = new Mean(); double[] genePredictabilityZscores = new double[predictionMatrixSignificant.rows()]; int[] pathwayCount = new int[predictionMatrixSignificant.rows()]; double[] geneKurtosis = new double[predictionMatrixSignificant.rows()]; double[] geneSkewness = new double[predictionMatrixSignificant.rows()]; double[] geneAnnotatedMean = new double[predictionMatrixSignificant.rows()]; double[] geneNotAnnotatedMean = new double[predictionMatrixSignificant.rows()]; for (int g = 0; g < predictionMatrixSignificant.rows(); g++) { kurtosisCalculator.clear(); skewnessCalculator.clear(); annotatedMeanCalculator.clear(); notAnnotatedMeanCalculator.clear(); DoubleMatrix1D geneAnnotations = annotationMatrixSignificant.getRow(g); int geneAnnotationCount = geneAnnotations.cardinality(); pathwayCount[g] = geneAnnotationCount; double[] zScoresAnnotatedPathways = new double[geneAnnotationCount]; double[] zScoresOtherPathways = new double[annotationMatrixSignificant.columns() - geneAnnotationCount]; int x = 0; int y = 0; for (int p = 0; p < geneAnnotations.size(); p++) { double z = predictionMatrixSignificant.getElementQuick(g, p); if (geneAnnotations.getQuick(p) != 0) { annotatedMeanCalculator.increment(z); zScoresAnnotatedPathways[x++] = z; } else { notAnnotatedMeanCalculator.increment(z); zScoresOtherPathways[y++] = z; } kurtosisCalculator.increment(z); skewnessCalculator.increment(z); } if (geneAnnotationCount >= 10) { uTest.setData(zScoresOtherPathways, zScoresAnnotatedPathways); genePredictabilityZscores[g] = uTest.getZ(); } else { genePredictabilityZscores[g] = Double.NaN; } geneKurtosis[g] = kurtosisCalculator.getResult(); geneSkewness[g] = skewnessCalculator.getResult(); geneAnnotatedMean[g] = annotatedMeanCalculator.getResult(); geneNotAnnotatedMean[g] = notAnnotatedMeanCalculator.getResult(); } CSVWriter writer = new CSVWriter(new FileWriter(outputFile), '\t', '\0', '\0', "\n"); String[] outputLine = new String[7]; int c = 0; outputLine[c++] = "Gene"; outputLine[c++] = "Z-score"; outputLine[c++] = "Skewness"; outputLine[c++] = "Kurtosis"; outputLine[c++] = "MeanNotAnnotated"; outputLine[c++] = "MeanAnnotated"; outputLine[c++] = "Annoted_pathways"; writer.writeNext(outputLine); ArrayList<String> geneNames = predictionMatrixSignificant.getRowObjects(); for (int g = 0; g < predictionMatrixSignificant.rows(); g++) { c = 0; outputLine[c++] = geneNames.get(g); outputLine[c++] = String.valueOf(genePredictabilityZscores[g]); outputLine[c++] = String.valueOf(geneSkewness[g]); outputLine[c++] = String.valueOf(geneKurtosis[g]); outputLine[c++] = String.valueOf(geneNotAnnotatedMean[g]); outputLine[c++] = String.valueOf(geneAnnotatedMean[g]); outputLine[c++] = String.valueOf(pathwayCount[g]); writer.writeNext(outputLine); } writer.close(); }
From source file:nl.systemsgenetics.genenetworkbackend.hpo.TestDiseaseGenePerformance.java
private static HashMap<String, MeanSd> calculatePathayMeansOfAnnotatedGenes( DoubleMatrixDataset<String, String> predictionMatrixSignificant, DoubleMatrixDataset<String, String> annotationMatrixSignificant) { HashMap<String, MeanSd> pathwayMeanSdMap = new HashMap<>(predictionMatrixSignificant.columns()); Mean meanCalculator = new Mean(); Variance varianceCalculator = new Variance(); for (String pathway : predictionMatrixSignificant.getColObjects()) { meanCalculator.clear(); varianceCalculator.clear();/*from w w w . ja v a 2 s . c o m*/ DoubleMatrix1D pathwayPredictions = predictionMatrixSignificant.getCol(pathway); DoubleMatrix1D pathwayAnnotations = annotationMatrixSignificant.getCol(pathway); for (int g = 0; g < pathwayPredictions.size(); ++g) { if (pathwayAnnotations.get(g) != 0) { meanCalculator.increment(pathwayPredictions.getQuick(g)); varianceCalculator.increment(pathwayPredictions.getQuick(g)); } } double v = varianceCalculator.getResult(); pathwayMeanSdMap.put(pathway, new MeanSd(meanCalculator.getResult(), v * v)); } return pathwayMeanSdMap; }