Example usage for org.apache.commons.math3.stat.correlation PearsonsCorrelation PearsonsCorrelation

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.correlation PearsonsCorrelation PearsonsCorrelation.

Prototype

public PearsonsCorrelation(Covariance covariance)

Source Link

Document

Create a PearsonsCorrelation from a Covariance .

Usage

From source file:GeMSE.GS.Analysis.Stats.OneSamplePearsonsCorrelationPanel.java

private void RunAnalysis() {
    if (_data == null)
        return;//ww  w  . jav a2  s.  co m
    _pearsonCorrelation = new PearsonsCorrelation(_data);
    double[][] matrix = (_pearsonCorrelation.getCorrelationMatrix()).getData();
    Plot(matrix);
    GridView(matrix);
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step6GraphTransitivityCleaner.java

public GraphCleaningResults processSingleFile(File file, File outputDir, String prefix,
        Boolean collectGeneratedArgumentPairs) throws Exception {
    GraphCleaningResults result = new GraphCleaningResults();

    File outFileTable = new File(outputDir, prefix + file.getName() + "_table.csv");
    File outFileInfo = new File(outputDir, prefix + file.getName() + "_info.txt");

    PrintStream psTable = new PrintStream(new FileOutputStream(outFileTable));
    PrintStream psInfo = new PrintStream(new FileOutputStream(outFileInfo));

    // load one topic/side
    List<AnnotatedArgumentPair> pairs = new ArrayList<>(
            (List<AnnotatedArgumentPair>) XStreamTools.getXStream().fromXML(file));

    int fullDataSize = pairs.size();

    // filter out missing gold data
    Iterator<AnnotatedArgumentPair> iterator = pairs.iterator();
    while (iterator.hasNext()) {
        AnnotatedArgumentPair pair = iterator.next();
        if (pair.getGoldLabel() == null) {
            iterator.remove();/*from w  w  w .  j  ava 2 s  . c o  m*/
        }
        // or we want to completely remove equal edges in advance!
        else if (this.removeEqualEdgesParam && "equal".equals(pair.getGoldLabel())) {
            iterator.remove();
        }
    }

    // sort pairs by their weight
    this.argumentPairListSorter.sortArgumentPairs(pairs);

    int preFilteredDataSize = pairs.size();

    // compute correlation between score threshold and number of removed edges
    double[] correlationEdgeWeights = new double[pairs.size()];
    double[] correlationRemovedEdges = new double[pairs.size()];

    // only cycles of length 0 to 5 are interesting (5+ are too big)
    Range<Integer> range = Range.between(0, 5);

    psTable.print(
            "EdgeWeightThreshold\tPairs\tignoredEdgesCount\tIsDAG\tTransitivityScoreMean\tTransitivityScoreMax\tTransitivityScoreSamples\tEdges\tNodes\t");
    for (int j = range.getMinimum(); j <= range.getMaximum(); j++) {
        psTable.print("Cycles_" + j + "\t");
    }
    psTable.println();

    // store the indices of all pairs (edges) that have been successfully added without
    // generating cycles
    TreeSet<Integer> addedPairsIndices = new TreeSet<>();

    // number of edges ignored as they generated cycles
    int ignoredEdgesCount = 0;

    Graph lastGraph = null;

    // flag that the first cycle was already processed
    boolean firstCycleAlreadyHit = false;

    for (int i = 1; i < pairs.size(); i++) {
        // now filter the finalArgumentPairList and add only pairs that have not generated cycles
        List<AnnotatedArgumentPair> subList = new ArrayList<>();

        for (Integer index : addedPairsIndices) {
            subList.add(pairs.get(index));
        }

        // and add the current at the end
        subList.add(pairs.get(i));

        // what is the current lowest value of a pair weight?
        double weakestEdgeWeight = computeEdgeWeight(subList.get(subList.size() - 1), LAMBDA_PENALTY);

        //            Graph graph = buildGraphFromArgumentPairs(finalArgumentPairList);
        int numberOfLoops;

        // map for storing cycles by their length
        TreeMap<Integer, TreeSet<String>> lengthCyclesMap = new TreeMap<>();

        Graph graph = buildGraphFromArgumentPairs(subList);

        lastGraph = graph;

        List<List<Object>> cyclesInGraph = findCyclesInGraph(graph);

        DescriptiveStatistics transitivityScore = new DescriptiveStatistics();

        if (cyclesInGraph.isEmpty()) {
            // we have DAG
            transitivityScore = computeTransitivityScores(graph);

            // update results
            result.maxTransitivityScore = (int) transitivityScore.getMax();
            result.avgTransitivityScore = transitivityScore.getMean();
        }

        numberOfLoops = cyclesInGraph.size();

        // initialize map
        for (int r = range.getMinimum(); r <= range.getMaximum(); r++) {
            lengthCyclesMap.put(r, new TreeSet<String>());
        }

        // we hit a loop
        if (numberOfLoops > 0) {
            // let's update the result

            if (!firstCycleAlreadyHit) {
                result.graphSizeEdgesBeforeFirstCycle = graph.getEdgeCount();
                result.graphSizeNodesBeforeFirstCycle = graph.getNodeCount();

                // find the shortest cycle
                int shortestCycleLength = Integer.MAX_VALUE;

                for (List<Object> cycle : cyclesInGraph) {
                    shortestCycleLength = Math.min(shortestCycleLength, cycle.size());
                }
                result.lengthOfFirstCircle = shortestCycleLength;

                result.pairsBeforeFirstCycle = i;

                firstCycleAlreadyHit = true;
            }

            // ignore this edge further
            ignoredEdgesCount++;

            // update counts of different cycles lengths
            for (List<Object> cycle : cyclesInGraph) {
                int currentSize = cycle.size();

                // convert to sorted set of nodes
                List<String> cycleAsSortedIDs = new ArrayList<>();
                for (Object o : cycle) {
                    cycleAsSortedIDs.add(o.toString());
                }
                Collections.sort(cycleAsSortedIDs);

                if (range.contains(currentSize)) {
                    lengthCyclesMap.get(currentSize).add(cycleAsSortedIDs.toString());
                }
            }
        } else {
            addedPairsIndices.add(i);
        }

        // we hit the first cycle

        // collect loop sizes
        StringBuilder loopsAsString = new StringBuilder();
        for (int j = range.getMinimum(); j <= range.getMaximum(); j++) {
            //                    loopsAsString.append(j).append(":");
            loopsAsString.append(lengthCyclesMap.get(j).size());
            loopsAsString.append("\t");
        }

        psTable.printf(Locale.ENGLISH, "%.4f\t%d\t%d\t%b\t%.2f\t%d\t%d\t%d\t%d\t%s%n", weakestEdgeWeight, i,
                ignoredEdgesCount, numberOfLoops == 0,
                Double.isNaN(transitivityScore.getMean()) ? 0d : transitivityScore.getMean(),
                (int) transitivityScore.getMax(), transitivityScore.getN(), graph.getEdgeCount(),
                graph.getNodeCount(), loopsAsString.toString().trim());

        // update result
        result.finalGraphSizeEdges = graph.getEdgeCount();
        result.finalGraphSizeNodes = graph.getNodeCount();
        result.ignoredEdgesThatBrokeDAG = ignoredEdgesCount;

        // update stats for correlation
        correlationEdgeWeights[i] = weakestEdgeWeight;
        //            correlationRemovedEdges[i] =  (double) ignoredEdgesCount;
        // let's try: if we keep = 0, if we remove = 1
        correlationRemovedEdges[i] = numberOfLoops == 0 ? 0.0 : 1.0;
    }

    psInfo.println("Original: " + fullDataSize + ", removed by MACE: " + (fullDataSize - preFilteredDataSize)
            + ", final: " + (preFilteredDataSize - ignoredEdgesCount) + " (removed: " + ignoredEdgesCount
            + ")");

    double[][] matrix = new double[correlationEdgeWeights.length][];
    for (int i = 0; i < correlationEdgeWeights.length; i++) {
        matrix[i] = new double[2];
        matrix[i][0] = correlationEdgeWeights[i];
        matrix[i][1] = correlationRemovedEdges[i];
    }

    PearsonsCorrelation pearsonsCorrelation = new PearsonsCorrelation(matrix);

    double pValue = pearsonsCorrelation.getCorrelationPValues().getEntry(0, 1);
    double correlation = pearsonsCorrelation.getCorrelationMatrix().getEntry(0, 1);

    psInfo.printf(Locale.ENGLISH, "Correlation: %.3f, p-Value: %.4f%n", correlation, pValue);
    if (lastGraph == null) {
        throw new IllegalStateException("Graph is null");
    }

    // close
    psInfo.close();
    psTable.close();

    // save filtered final gold data
    List<AnnotatedArgumentPair> finalArgumentPairList = new ArrayList<>();

    for (Integer index : addedPairsIndices) {
        finalArgumentPairList.add(pairs.get(index));
    }
    XStreamTools.toXML(finalArgumentPairList, new File(outputDir, prefix + file.getName()));

    // TODO: here, we can add newly generated edges from graph transitivity
    if (collectGeneratedArgumentPairs) {
        Set<GeneratedArgumentPair> generatedArgumentPairs = new HashSet<>();
        // collect all arguments
        Map<String, Argument> allArguments = new HashMap<>();
        for (ArgumentPair argumentPair : pairs) {
            allArguments.put(argumentPair.getArg1().getId(), argumentPair.getArg1());
            allArguments.put(argumentPair.getArg2().getId(), argumentPair.getArg2());
        }

        Graph finalGraph = buildGraphFromArgumentPairs(finalArgumentPairList);
        for (Edge e : finalGraph.getEdgeSet()) {
            e.setAttribute(WEIGHT, 1.0);
        }

        for (Node j : finalGraph) {
            for (Node k : finalGraph) {
                if (j != k) {
                    // is there a path between?
                    BellmanFord bfShortest = new BellmanFord(WEIGHT, j.getId());
                    bfShortest.init(finalGraph);
                    bfShortest.compute();

                    Path shortestPath = bfShortest.getShortestPath(k);

                    if (shortestPath.size() > 0) {
                        // we have a path
                        GeneratedArgumentPair ap = new GeneratedArgumentPair();
                        Argument arg1 = allArguments.get(j.getId());

                        if (arg1 == null) {
                            throw new IllegalStateException("Cannot find argument " + j.getId());
                        }
                        ap.setArg1(arg1);

                        Argument arg2 = allArguments.get(k.getId());

                        if (arg2 == null) {
                            throw new IllegalStateException("Cannot find argument " + k.getId());
                        }
                        ap.setArg2(arg2);

                        ap.setGoldLabel("a1");
                        generatedArgumentPairs.add(ap);
                    }
                }
            }
        }
        // and now add the reverse ones
        Set<GeneratedArgumentPair> generatedReversePairs = new HashSet<>();
        for (GeneratedArgumentPair pair : generatedArgumentPairs) {
            GeneratedArgumentPair ap = new GeneratedArgumentPair();
            ap.setArg1(pair.getArg2());
            ap.setArg2(pair.getArg1());
            ap.setGoldLabel("a2");
            generatedReversePairs.add(ap);
        }
        generatedArgumentPairs.addAll(generatedReversePairs);
        // and save it
        XStreamTools.toXML(generatedArgumentPairs, new File(outputDir, "generated_" + prefix + file.getName()));
    }

    result.fullPairsSize = fullDataSize;
    result.removedApriori = (fullDataSize - preFilteredDataSize);
    result.finalPairsRetained = finalArgumentPairList.size();

    // save the final graph
    Graph outGraph = cleanCopyGraph(lastGraph);
    FileSinkDGS dgs1 = new FileSinkDGS();
    File outFile = new File(outputDir, prefix + file.getName() + ".dgs");

    System.out.println("Saved to " + outFile);
    FileWriter w1 = new FileWriter(outFile);

    dgs1.writeAll(outGraph, w1);
    w1.close();

    return result;
}

From source file:org.meteoinfo.math.stats.StatsUtil.java

/**
 * Calculates a Pearson correlation coefficient.
 *
 * @param x X data//w w w  . j a va 2  s  .co  m
 * @param y Y data
 * @return Pearson correlation and p-value.
 */
public static double[] pearsonr(Array x, Array y) {
    int m = x.getShape()[0];
    int n = 1;
    double[][] aa = new double[m][n * 2];
    for (int i = 0; i < m; i++) {
        for (int j = 0; j < n * 2; j++) {
            if (j < n) {
                aa[i][j] = x.getDouble(i * n + j);
            } else {
                aa[i][j] = y.getDouble(i * n + j - n);
            }
        }
    }
    RealMatrix matrix = new Array2DRowRealMatrix(aa, false);
    PearsonsCorrelation pc = new PearsonsCorrelation(matrix);
    double r = pc.getCorrelationMatrix().getEntry(0, 1);
    double pvalue = pc.getCorrelationPValues().getEntry(0, 1);
    return new double[] { r, pvalue };
}

From source file:restclient.service.RecordFacadeREST.java

@GET
@Path("findCorrelation/{uid}/{sdate}/{edate}/{wvariable}")
@Produces({ "application/json" })
public List<Correlation> findCorrelation(@PathParam("uid") Integer uid, @PathParam("sdate") String date1,
        @PathParam("edate") String date2, @PathParam("wvariable") String wv) throws ParseException {
    SimpleDateFormat sdf1 = new SimpleDateFormat("yyyy-MM-dd");
    SimpleDateFormat sdf2 = new SimpleDateFormat("dd/MM/yyyy");
    Date sdate = sdf1.parse(date1);
    Date edate = sdf1.parse(date2);
    TypedQuery<Record> q = em.createQuery(
            "SELECT r FROM Record r WHERE r.date >= :sdate AND r.date <= :edate AND r.uid.uid = :uid order by r.date ASC",
            Record.class);
    q.setParameter("uid", uid);
    q.setParameter("sdate", sdate);
    q.setParameter("edate", edate);
    List<Record> qr = q.getResultList();
    List<Correlation> re = new ArrayList<Correlation>();
    Correlation cl = new Correlation();
    double data[][] = new double[qr.size()][2];
    for (int i = 0; i < qr.size(); ++i) {
        Record r = qr.get(i);/* w w  w . j a v  a2  s.  co m*/
        data[i][0] = r.getPlevel();
        if (wv.equals("temperature")) {
            data[i][1] = r.getTemp();
        } else if (wv.equals("humidity")) {
            data[i][1] = r.getHumidity();
        } else if (wv.equals("windspeed")) {
            data[i][1] = r.getWindspeed();
        } else {
            data[i][1] = r.getPressure();
        }
    }
    RealMatrix m = MatrixUtils.createRealMatrix(data);
    PearsonsCorrelation pc = new PearsonsCorrelation(m);
    RealMatrix corM = pc.getCorrelationMatrix();
    cl.setRvalue(corM.getEntry(0, 1));
    RealMatrix pM = pc.getCorrelationPValues();
    cl.setSvalue(pM.getEntry(0, 1));
    re.add(cl);
    return re;
}

From source file:sanger.team16.common.stats.SimpleLinear.java

public double getCorrelationPValue() {
    double[][] data = new double[n][2];
    for (int i = 0; i < n; i++) {
        data[i][0] = sampleX[i];/*from w  w w.jav a 2 s. c  o m*/
        data[i][1] = sampleY[i];
    }

    return new PearsonsCorrelation(data).getCorrelationPValues().getEntry(0, 1);
}

From source file:sanger.team16.common.stats.SimpleLinear.java

public double permute(int permutation, double nominalP) {
    List<Integer> index = initIndex();
    double count = 0; //N of Pemp lower than

    for (int p = 0; p < permutation; p++) {
        Collections.shuffle(index);

        double[][] data = new double[n][2];
        for (int i = 0; i < n; i++) {
            data[i][0] = sampleX[i];// w  w w  .ja  v  a  2s  .c o  m
            data[i][1] = sampleY[index.get(i)];
        }

        double empiricalP = new PearsonsCorrelation(data).getCorrelationPValues().getEntry(0, 1);
        if (empiricalP <= nominalP)
            count++;
    }

    return (double) count / permutation;
}

From source file:sanger.team16.common.stats.SpearmansRank.java

public double getCorrelationPValue() {
    double[][] data = new double[n][2];
    for (int i = 0; i < n; i++) {
        data[i][0] = rankX[i];//from   w ww . ja v  a2s  .c  o m
        data[i][1] = rankY[i];
    }

    return new PearsonsCorrelation(data).getCorrelationPValues().getEntry(0, 1);
}

From source file:sanger.team16.common.stats.SpearmansRank.java

public double permute(int permutation, double nominalP) {
    List<Integer> index = initIndex();
    double count = 0; //N of Pemp lower than

    for (int p = 0; p < permutation; p++) {
        Collections.shuffle(index);

        double[][] data = new double[n][2];
        for (int i = 0; i < n; i++) {
            data[i][0] = rankX[i];/*from w w w. j ava2 s  .co  m*/
            data[i][1] = rankY[index.get(i)];
        }

        double empiricalP = new PearsonsCorrelation(data).getCorrelationPValues().getEntry(0, 1);
        if (empiricalP <= nominalP)
            count++;
    }

    return (double) count / permutation;
}

From source file:stats.SpearmansCorrelation.java

/**
 * Create a SpearmansCorrelation with the given input data matrix and ranking
 * algorithm./*from  w  ww .  j  a va2  s .  c  om*/
 * <p>
 * From version 4.0 onwards this constructor will throw an exception if the
 * provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED}
 * strategy.
 *
 * @param dataMatrix
 *          matrix of data with columns representing variables to correlate
 * @param rankingAlgorithm
 *          ranking algorithm
 */
public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) {
    this.rankingAlgorithm = rankingAlgorithm;
    this.data = rankTransform(dataMatrix);
    rankCorrelation = new PearsonsCorrelation(data);
}