Example usage for org.apache.commons.math3.stat.correlation PearsonsCorrelation PearsonsCorrelation

List of usage examples for org.apache.commons.math3.stat.correlation PearsonsCorrelation PearsonsCorrelation

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.correlation PearsonsCorrelation PearsonsCorrelation.

Prototype

public PearsonsCorrelation(Covariance covariance) 

Source Link

Document

Create a PearsonsCorrelation from a Covariance .

Usage

From source file:GeMSE.GS.Analysis.Stats.OneSamplePearsonsCorrelationPanel.java

private void RunAnalysis() {
    if (_data == null)
        return;//ww  w  . jav a2  s.  co m
    _pearsonCorrelation = new PearsonsCorrelation(_data);
    double[][] matrix = (_pearsonCorrelation.getCorrelationMatrix()).getData();
    Plot(matrix);
    GridView(matrix);
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step6GraphTransitivityCleaner.java

public GraphCleaningResults processSingleFile(File file, File outputDir, String prefix,
        Boolean collectGeneratedArgumentPairs) throws Exception {
    GraphCleaningResults result = new GraphCleaningResults();

    File outFileTable = new File(outputDir, prefix + file.getName() + "_table.csv");
    File outFileInfo = new File(outputDir, prefix + file.getName() + "_info.txt");

    PrintStream psTable = new PrintStream(new FileOutputStream(outFileTable));
    PrintStream psInfo = new PrintStream(new FileOutputStream(outFileInfo));

    // load one topic/side
    List<AnnotatedArgumentPair> pairs = new ArrayList<>(
            (List<AnnotatedArgumentPair>) XStreamTools.getXStream().fromXML(file));

    int fullDataSize = pairs.size();

    // filter out missing gold data
    Iterator<AnnotatedArgumentPair> iterator = pairs.iterator();
    while (iterator.hasNext()) {
        AnnotatedArgumentPair pair = iterator.next();
        if (pair.getGoldLabel() == null) {
            iterator.remove();/*from w  w  w .  j  ava 2 s  . c o  m*/
        }
        // or we want to completely remove equal edges in advance!
        else if (this.removeEqualEdgesParam && "equal".equals(pair.getGoldLabel())) {
            iterator.remove();
        }
    }

    // sort pairs by their weight
    this.argumentPairListSorter.sortArgumentPairs(pairs);

    int preFilteredDataSize = pairs.size();

    // compute correlation between score threshold and number of removed edges
    double[] correlationEdgeWeights = new double[pairs.size()];
    double[] correlationRemovedEdges = new double[pairs.size()];

    // only cycles of length 0 to 5 are interesting (5+ are too big)
    Range<Integer> range = Range.between(0, 5);

    psTable.print(
            "EdgeWeightThreshold\tPairs\tignoredEdgesCount\tIsDAG\tTransitivityScoreMean\tTransitivityScoreMax\tTransitivityScoreSamples\tEdges\tNodes\t");
    for (int j = range.getMinimum(); j <= range.getMaximum(); j++) {
        psTable.print("Cycles_" + j + "\t");
    }
    psTable.println();

    // store the indices of all pairs (edges) that have been successfully added without
    // generating cycles
    TreeSet<Integer> addedPairsIndices = new TreeSet<>();

    // number of edges ignored as they generated cycles
    int ignoredEdgesCount = 0;

    Graph lastGraph = null;

    // flag that the first cycle was already processed
    boolean firstCycleAlreadyHit = false;

    for (int i = 1; i < pairs.size(); i++) {
        // now filter the finalArgumentPairList and add only pairs that have not generated cycles
        List<AnnotatedArgumentPair> subList = new ArrayList<>();

        for (Integer index : addedPairsIndices) {
            subList.add(pairs.get(index));
        }

        // and add the current at the end
        subList.add(pairs.get(i));

        // what is the current lowest value of a pair weight?
        double weakestEdgeWeight = computeEdgeWeight(subList.get(subList.size() - 1), LAMBDA_PENALTY);

        //            Graph graph = buildGraphFromArgumentPairs(finalArgumentPairList);
        int numberOfLoops;

        // map for storing cycles by their length
        TreeMap<Integer, TreeSet<String>> lengthCyclesMap = new TreeMap<>();

        Graph graph = buildGraphFromArgumentPairs(subList);

        lastGraph = graph;

        List<List<Object>> cyclesInGraph = findCyclesInGraph(graph);

        DescriptiveStatistics transitivityScore = new DescriptiveStatistics();

        if (cyclesInGraph.isEmpty()) {
            // we have DAG
            transitivityScore = computeTransitivityScores(graph);

            // update results
            result.maxTransitivityScore = (int) transitivityScore.getMax();
            result.avgTransitivityScore = transitivityScore.getMean();
        }

        numberOfLoops = cyclesInGraph.size();

        // initialize map
        for (int r = range.getMinimum(); r <= range.getMaximum(); r++) {
            lengthCyclesMap.put(r, new TreeSet<String>());
        }

        // we hit a loop
        if (numberOfLoops > 0) {
            // let's update the result

            if (!firstCycleAlreadyHit) {
                result.graphSizeEdgesBeforeFirstCycle = graph.getEdgeCount();
                result.graphSizeNodesBeforeFirstCycle = graph.getNodeCount();

                // find the shortest cycle
                int shortestCycleLength = Integer.MAX_VALUE;

                for (List<Object> cycle : cyclesInGraph) {
                    shortestCycleLength = Math.min(shortestCycleLength, cycle.size());
                }
                result.lengthOfFirstCircle = shortestCycleLength;

                result.pairsBeforeFirstCycle = i;

                firstCycleAlreadyHit = true;
            }

            // ignore this edge further
            ignoredEdgesCount++;

            // update counts of different cycles lengths
            for (List<Object> cycle : cyclesInGraph) {
                int currentSize = cycle.size();

                // convert to sorted set of nodes
                List<String> cycleAsSortedIDs = new ArrayList<>();
                for (Object o : cycle) {
                    cycleAsSortedIDs.add(o.toString());
                }
                Collections.sort(cycleAsSortedIDs);

                if (range.contains(currentSize)) {
                    lengthCyclesMap.get(currentSize).add(cycleAsSortedIDs.toString());
                }
            }
        } else {
            addedPairsIndices.add(i);
        }

        // we hit the first cycle

        // collect loop sizes
        StringBuilder loopsAsString = new StringBuilder();
        for (int j = range.getMinimum(); j <= range.getMaximum(); j++) {
            //                    loopsAsString.append(j).append(":");
            loopsAsString.append(lengthCyclesMap.get(j).size());
            loopsAsString.append("\t");
        }

        psTable.printf(Locale.ENGLISH, "%.4f\t%d\t%d\t%b\t%.2f\t%d\t%d\t%d\t%d\t%s%n", weakestEdgeWeight, i,
                ignoredEdgesCount, numberOfLoops == 0,
                Double.isNaN(transitivityScore.getMean()) ? 0d : transitivityScore.getMean(),
                (int) transitivityScore.getMax(), transitivityScore.getN(), graph.getEdgeCount(),
                graph.getNodeCount(), loopsAsString.toString().trim());

        // update result
        result.finalGraphSizeEdges = graph.getEdgeCount();
        result.finalGraphSizeNodes = graph.getNodeCount();
        result.ignoredEdgesThatBrokeDAG = ignoredEdgesCount;

        // update stats for correlation
        correlationEdgeWeights[i] = weakestEdgeWeight;
        //            correlationRemovedEdges[i] =  (double) ignoredEdgesCount;
        // let's try: if we keep = 0, if we remove = 1
        correlationRemovedEdges[i] = numberOfLoops == 0 ? 0.0 : 1.0;
    }

    psInfo.println("Original: " + fullDataSize + ", removed by MACE: " + (fullDataSize - preFilteredDataSize)
            + ", final: " + (preFilteredDataSize - ignoredEdgesCount) + " (removed: " + ignoredEdgesCount
            + ")");

    double[][] matrix = new double[correlationEdgeWeights.length][];
    for (int i = 0; i < correlationEdgeWeights.length; i++) {
        matrix[i] = new double[2];
        matrix[i][0] = correlationEdgeWeights[i];
        matrix[i][1] = correlationRemovedEdges[i];
    }

    PearsonsCorrelation pearsonsCorrelation = new PearsonsCorrelation(matrix);

    double pValue = pearsonsCorrelation.getCorrelationPValues().getEntry(0, 1);
    double correlation = pearsonsCorrelation.getCorrelationMatrix().getEntry(0, 1);

    psInfo.printf(Locale.ENGLISH, "Correlation: %.3f, p-Value: %.4f%n", correlation, pValue);
    if (lastGraph == null) {
        throw new IllegalStateException("Graph is null");
    }

    // close
    psInfo.close();
    psTable.close();

    // save filtered final gold data
    List<AnnotatedArgumentPair> finalArgumentPairList = new ArrayList<>();

    for (Integer index : addedPairsIndices) {
        finalArgumentPairList.add(pairs.get(index));
    }
    XStreamTools.toXML(finalArgumentPairList, new File(outputDir, prefix + file.getName()));

    // TODO: here, we can add newly generated edges from graph transitivity
    if (collectGeneratedArgumentPairs) {
        Set<GeneratedArgumentPair> generatedArgumentPairs = new HashSet<>();
        // collect all arguments
        Map<String, Argument> allArguments = new HashMap<>();
        for (ArgumentPair argumentPair : pairs) {
            allArguments.put(argumentPair.getArg1().getId(), argumentPair.getArg1());
            allArguments.put(argumentPair.getArg2().getId(), argumentPair.getArg2());
        }

        Graph finalGraph = buildGraphFromArgumentPairs(finalArgumentPairList);
        for (Edge e : finalGraph.getEdgeSet()) {
            e.setAttribute(WEIGHT, 1.0);
        }

        for (Node j : finalGraph) {
            for (Node k : finalGraph) {
                if (j != k) {
                    // is there a path between?
                    BellmanFord bfShortest = new BellmanFord(WEIGHT, j.getId());
                    bfShortest.init(finalGraph);
                    bfShortest.compute();

                    Path shortestPath = bfShortest.getShortestPath(k);

                    if (shortestPath.size() > 0) {
                        // we have a path
                        GeneratedArgumentPair ap = new GeneratedArgumentPair();
                        Argument arg1 = allArguments.get(j.getId());

                        if (arg1 == null) {
                            throw new IllegalStateException("Cannot find argument " + j.getId());
                        }
                        ap.setArg1(arg1);

                        Argument arg2 = allArguments.get(k.getId());

                        if (arg2 == null) {
                            throw new IllegalStateException("Cannot find argument " + k.getId());
                        }
                        ap.setArg2(arg2);

                        ap.setGoldLabel("a1");
                        generatedArgumentPairs.add(ap);
                    }
                }
            }
        }
        // and now add the reverse ones
        Set<GeneratedArgumentPair> generatedReversePairs = new HashSet<>();
        for (GeneratedArgumentPair pair : generatedArgumentPairs) {
            GeneratedArgumentPair ap = new GeneratedArgumentPair();
            ap.setArg1(pair.getArg2());
            ap.setArg2(pair.getArg1());
            ap.setGoldLabel("a2");
            generatedReversePairs.add(ap);
        }
        generatedArgumentPairs.addAll(generatedReversePairs);
        // and save it
        XStreamTools.toXML(generatedArgumentPairs, new File(outputDir, "generated_" + prefix + file.getName()));
    }

    result.fullPairsSize = fullDataSize;
    result.removedApriori = (fullDataSize - preFilteredDataSize);
    result.finalPairsRetained = finalArgumentPairList.size();

    // save the final graph
    Graph outGraph = cleanCopyGraph(lastGraph);
    FileSinkDGS dgs1 = new FileSinkDGS();
    File outFile = new File(outputDir, prefix + file.getName() + ".dgs");

    System.out.println("Saved to " + outFile);
    FileWriter w1 = new FileWriter(outFile);

    dgs1.writeAll(outGraph, w1);
    w1.close();

    return result;
}

From source file:org.meteoinfo.math.stats.StatsUtil.java

/**
 * Calculates a Pearson correlation coefficient.
 *
 * @param x X data//w w w  . j a va 2  s  .co  m
 * @param y Y data
 * @return Pearson correlation and p-value.
 */
public static double[] pearsonr(Array x, Array y) {
    int m = x.getShape()[0];
    int n = 1;
    double[][] aa = new double[m][n * 2];
    for (int i = 0; i < m; i++) {
        for (int j = 0; j < n * 2; j++) {
            if (j < n) {
                aa[i][j] = x.getDouble(i * n + j);
            } else {
                aa[i][j] = y.getDouble(i * n + j - n);
            }
        }
    }
    RealMatrix matrix = new Array2DRowRealMatrix(aa, false);
    PearsonsCorrelation pc = new PearsonsCorrelation(matrix);
    double r = pc.getCorrelationMatrix().getEntry(0, 1);
    double pvalue = pc.getCorrelationPValues().getEntry(0, 1);
    return new double[] { r, pvalue };
}

From source file:restclient.service.RecordFacadeREST.java

@GET
@Path("findCorrelation/{uid}/{sdate}/{edate}/{wvariable}")
@Produces({ "application/json" })
public List<Correlation> findCorrelation(@PathParam("uid") Integer uid, @PathParam("sdate") String date1,
        @PathParam("edate") String date2, @PathParam("wvariable") String wv) throws ParseException {
    SimpleDateFormat sdf1 = new SimpleDateFormat("yyyy-MM-dd");
    SimpleDateFormat sdf2 = new SimpleDateFormat("dd/MM/yyyy");
    Date sdate = sdf1.parse(date1);
    Date edate = sdf1.parse(date2);
    TypedQuery<Record> q = em.createQuery(
            "SELECT r FROM Record r WHERE r.date >= :sdate AND r.date <= :edate AND r.uid.uid = :uid order by r.date ASC",
            Record.class);
    q.setParameter("uid", uid);
    q.setParameter("sdate", sdate);
    q.setParameter("edate", edate);
    List<Record> qr = q.getResultList();
    List<Correlation> re = new ArrayList<Correlation>();
    Correlation cl = new Correlation();
    double data[][] = new double[qr.size()][2];
    for (int i = 0; i < qr.size(); ++i) {
        Record r = qr.get(i);/* w w  w . j a v  a2  s.  co m*/
        data[i][0] = r.getPlevel();
        if (wv.equals("temperature")) {
            data[i][1] = r.getTemp();
        } else if (wv.equals("humidity")) {
            data[i][1] = r.getHumidity();
        } else if (wv.equals("windspeed")) {
            data[i][1] = r.getWindspeed();
        } else {
            data[i][1] = r.getPressure();
        }
    }
    RealMatrix m = MatrixUtils.createRealMatrix(data);
    PearsonsCorrelation pc = new PearsonsCorrelation(m);
    RealMatrix corM = pc.getCorrelationMatrix();
    cl.setRvalue(corM.getEntry(0, 1));
    RealMatrix pM = pc.getCorrelationPValues();
    cl.setSvalue(pM.getEntry(0, 1));
    re.add(cl);
    return re;
}

From source file:sanger.team16.common.stats.SimpleLinear.java

public double getCorrelationPValue() {
    double[][] data = new double[n][2];
    for (int i = 0; i < n; i++) {
        data[i][0] = sampleX[i];/*from w  w w.jav a 2 s. c  o m*/
        data[i][1] = sampleY[i];
    }

    return new PearsonsCorrelation(data).getCorrelationPValues().getEntry(0, 1);
}

From source file:sanger.team16.common.stats.SimpleLinear.java

public double permute(int permutation, double nominalP) {
    List<Integer> index = initIndex();
    double count = 0; //N of Pemp lower than

    for (int p = 0; p < permutation; p++) {
        Collections.shuffle(index);

        double[][] data = new double[n][2];
        for (int i = 0; i < n; i++) {
            data[i][0] = sampleX[i];// w  w w  .ja  v  a  2s  .c o  m
            data[i][1] = sampleY[index.get(i)];
        }

        double empiricalP = new PearsonsCorrelation(data).getCorrelationPValues().getEntry(0, 1);
        if (empiricalP <= nominalP)
            count++;
    }

    return (double) count / permutation;
}

From source file:sanger.team16.common.stats.SpearmansRank.java

public double getCorrelationPValue() {
    double[][] data = new double[n][2];
    for (int i = 0; i < n; i++) {
        data[i][0] = rankX[i];//from   w ww . ja v  a2s  .c  o m
        data[i][1] = rankY[i];
    }

    return new PearsonsCorrelation(data).getCorrelationPValues().getEntry(0, 1);
}

From source file:sanger.team16.common.stats.SpearmansRank.java

public double permute(int permutation, double nominalP) {
    List<Integer> index = initIndex();
    double count = 0; //N of Pemp lower than

    for (int p = 0; p < permutation; p++) {
        Collections.shuffle(index);

        double[][] data = new double[n][2];
        for (int i = 0; i < n; i++) {
            data[i][0] = rankX[i];/*from w w w. j ava2 s  .co  m*/
            data[i][1] = rankY[index.get(i)];
        }

        double empiricalP = new PearsonsCorrelation(data).getCorrelationPValues().getEntry(0, 1);
        if (empiricalP <= nominalP)
            count++;
    }

    return (double) count / permutation;
}

From source file:stats.SpearmansCorrelation.java

/**
 * Create a SpearmansCorrelation with the given input data matrix and ranking
 * algorithm./*from  w  ww .  j  a va2  s .  c  om*/
 * <p>
 * From version 4.0 onwards this constructor will throw an exception if the
 * provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED}
 * strategy.
 *
 * @param dataMatrix
 *          matrix of data with columns representing variables to correlate
 * @param rankingAlgorithm
 *          ranking algorithm
 */
public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) {
    this.rankingAlgorithm = rankingAlgorithm;
    this.data = rankTransform(dataMatrix);
    rankCorrelation = new PearsonsCorrelation(data);
}