Example usage for org.apache.commons.math3.stat.correlation PearsonsCorrelation getCorrelationPValues

List of usage examples for org.apache.commons.math3.stat.correlation PearsonsCorrelation getCorrelationPValues

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.correlation PearsonsCorrelation getCorrelationPValues.

Prototype

public RealMatrix getCorrelationPValues() 

Source Link

Document

Returns a matrix of p-values associated with the (two-sided) null hypothesis that the corresponding correlation coefficient is zero.

Usage

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step6GraphTransitivityCleaner.java

public GraphCleaningResults processSingleFile(File file, File outputDir, String prefix,
        Boolean collectGeneratedArgumentPairs) throws Exception {
    GraphCleaningResults result = new GraphCleaningResults();

    File outFileTable = new File(outputDir, prefix + file.getName() + "_table.csv");
    File outFileInfo = new File(outputDir, prefix + file.getName() + "_info.txt");

    PrintStream psTable = new PrintStream(new FileOutputStream(outFileTable));
    PrintStream psInfo = new PrintStream(new FileOutputStream(outFileInfo));

    // load one topic/side
    List<AnnotatedArgumentPair> pairs = new ArrayList<>(
            (List<AnnotatedArgumentPair>) XStreamTools.getXStream().fromXML(file));

    int fullDataSize = pairs.size();

    // filter out missing gold data
    Iterator<AnnotatedArgumentPair> iterator = pairs.iterator();
    while (iterator.hasNext()) {
        AnnotatedArgumentPair pair = iterator.next();
        if (pair.getGoldLabel() == null) {
            iterator.remove();//from   ww  w.  j a v  a2  s.  c o  m
        }
        // or we want to completely remove equal edges in advance!
        else if (this.removeEqualEdgesParam && "equal".equals(pair.getGoldLabel())) {
            iterator.remove();
        }
    }

    // sort pairs by their weight
    this.argumentPairListSorter.sortArgumentPairs(pairs);

    int preFilteredDataSize = pairs.size();

    // compute correlation between score threshold and number of removed edges
    double[] correlationEdgeWeights = new double[pairs.size()];
    double[] correlationRemovedEdges = new double[pairs.size()];

    // only cycles of length 0 to 5 are interesting (5+ are too big)
    Range<Integer> range = Range.between(0, 5);

    psTable.print(
            "EdgeWeightThreshold\tPairs\tignoredEdgesCount\tIsDAG\tTransitivityScoreMean\tTransitivityScoreMax\tTransitivityScoreSamples\tEdges\tNodes\t");
    for (int j = range.getMinimum(); j <= range.getMaximum(); j++) {
        psTable.print("Cycles_" + j + "\t");
    }
    psTable.println();

    // store the indices of all pairs (edges) that have been successfully added without
    // generating cycles
    TreeSet<Integer> addedPairsIndices = new TreeSet<>();

    // number of edges ignored as they generated cycles
    int ignoredEdgesCount = 0;

    Graph lastGraph = null;

    // flag that the first cycle was already processed
    boolean firstCycleAlreadyHit = false;

    for (int i = 1; i < pairs.size(); i++) {
        // now filter the finalArgumentPairList and add only pairs that have not generated cycles
        List<AnnotatedArgumentPair> subList = new ArrayList<>();

        for (Integer index : addedPairsIndices) {
            subList.add(pairs.get(index));
        }

        // and add the current at the end
        subList.add(pairs.get(i));

        // what is the current lowest value of a pair weight?
        double weakestEdgeWeight = computeEdgeWeight(subList.get(subList.size() - 1), LAMBDA_PENALTY);

        //            Graph graph = buildGraphFromArgumentPairs(finalArgumentPairList);
        int numberOfLoops;

        // map for storing cycles by their length
        TreeMap<Integer, TreeSet<String>> lengthCyclesMap = new TreeMap<>();

        Graph graph = buildGraphFromArgumentPairs(subList);

        lastGraph = graph;

        List<List<Object>> cyclesInGraph = findCyclesInGraph(graph);

        DescriptiveStatistics transitivityScore = new DescriptiveStatistics();

        if (cyclesInGraph.isEmpty()) {
            // we have DAG
            transitivityScore = computeTransitivityScores(graph);

            // update results
            result.maxTransitivityScore = (int) transitivityScore.getMax();
            result.avgTransitivityScore = transitivityScore.getMean();
        }

        numberOfLoops = cyclesInGraph.size();

        // initialize map
        for (int r = range.getMinimum(); r <= range.getMaximum(); r++) {
            lengthCyclesMap.put(r, new TreeSet<String>());
        }

        // we hit a loop
        if (numberOfLoops > 0) {
            // let's update the result

            if (!firstCycleAlreadyHit) {
                result.graphSizeEdgesBeforeFirstCycle = graph.getEdgeCount();
                result.graphSizeNodesBeforeFirstCycle = graph.getNodeCount();

                // find the shortest cycle
                int shortestCycleLength = Integer.MAX_VALUE;

                for (List<Object> cycle : cyclesInGraph) {
                    shortestCycleLength = Math.min(shortestCycleLength, cycle.size());
                }
                result.lengthOfFirstCircle = shortestCycleLength;

                result.pairsBeforeFirstCycle = i;

                firstCycleAlreadyHit = true;
            }

            // ignore this edge further
            ignoredEdgesCount++;

            // update counts of different cycles lengths
            for (List<Object> cycle : cyclesInGraph) {
                int currentSize = cycle.size();

                // convert to sorted set of nodes
                List<String> cycleAsSortedIDs = new ArrayList<>();
                for (Object o : cycle) {
                    cycleAsSortedIDs.add(o.toString());
                }
                Collections.sort(cycleAsSortedIDs);

                if (range.contains(currentSize)) {
                    lengthCyclesMap.get(currentSize).add(cycleAsSortedIDs.toString());
                }
            }
        } else {
            addedPairsIndices.add(i);
        }

        // we hit the first cycle

        // collect loop sizes
        StringBuilder loopsAsString = new StringBuilder();
        for (int j = range.getMinimum(); j <= range.getMaximum(); j++) {
            //                    loopsAsString.append(j).append(":");
            loopsAsString.append(lengthCyclesMap.get(j).size());
            loopsAsString.append("\t");
        }

        psTable.printf(Locale.ENGLISH, "%.4f\t%d\t%d\t%b\t%.2f\t%d\t%d\t%d\t%d\t%s%n", weakestEdgeWeight, i,
                ignoredEdgesCount, numberOfLoops == 0,
                Double.isNaN(transitivityScore.getMean()) ? 0d : transitivityScore.getMean(),
                (int) transitivityScore.getMax(), transitivityScore.getN(), graph.getEdgeCount(),
                graph.getNodeCount(), loopsAsString.toString().trim());

        // update result
        result.finalGraphSizeEdges = graph.getEdgeCount();
        result.finalGraphSizeNodes = graph.getNodeCount();
        result.ignoredEdgesThatBrokeDAG = ignoredEdgesCount;

        // update stats for correlation
        correlationEdgeWeights[i] = weakestEdgeWeight;
        //            correlationRemovedEdges[i] =  (double) ignoredEdgesCount;
        // let's try: if we keep = 0, if we remove = 1
        correlationRemovedEdges[i] = numberOfLoops == 0 ? 0.0 : 1.0;
    }

    psInfo.println("Original: " + fullDataSize + ", removed by MACE: " + (fullDataSize - preFilteredDataSize)
            + ", final: " + (preFilteredDataSize - ignoredEdgesCount) + " (removed: " + ignoredEdgesCount
            + ")");

    double[][] matrix = new double[correlationEdgeWeights.length][];
    for (int i = 0; i < correlationEdgeWeights.length; i++) {
        matrix[i] = new double[2];
        matrix[i][0] = correlationEdgeWeights[i];
        matrix[i][1] = correlationRemovedEdges[i];
    }

    PearsonsCorrelation pearsonsCorrelation = new PearsonsCorrelation(matrix);

    double pValue = pearsonsCorrelation.getCorrelationPValues().getEntry(0, 1);
    double correlation = pearsonsCorrelation.getCorrelationMatrix().getEntry(0, 1);

    psInfo.printf(Locale.ENGLISH, "Correlation: %.3f, p-Value: %.4f%n", correlation, pValue);
    if (lastGraph == null) {
        throw new IllegalStateException("Graph is null");
    }

    // close
    psInfo.close();
    psTable.close();

    // save filtered final gold data
    List<AnnotatedArgumentPair> finalArgumentPairList = new ArrayList<>();

    for (Integer index : addedPairsIndices) {
        finalArgumentPairList.add(pairs.get(index));
    }
    XStreamTools.toXML(finalArgumentPairList, new File(outputDir, prefix + file.getName()));

    // TODO: here, we can add newly generated edges from graph transitivity
    if (collectGeneratedArgumentPairs) {
        Set<GeneratedArgumentPair> generatedArgumentPairs = new HashSet<>();
        // collect all arguments
        Map<String, Argument> allArguments = new HashMap<>();
        for (ArgumentPair argumentPair : pairs) {
            allArguments.put(argumentPair.getArg1().getId(), argumentPair.getArg1());
            allArguments.put(argumentPair.getArg2().getId(), argumentPair.getArg2());
        }

        Graph finalGraph = buildGraphFromArgumentPairs(finalArgumentPairList);
        for (Edge e : finalGraph.getEdgeSet()) {
            e.setAttribute(WEIGHT, 1.0);
        }

        for (Node j : finalGraph) {
            for (Node k : finalGraph) {
                if (j != k) {
                    // is there a path between?
                    BellmanFord bfShortest = new BellmanFord(WEIGHT, j.getId());
                    bfShortest.init(finalGraph);
                    bfShortest.compute();

                    Path shortestPath = bfShortest.getShortestPath(k);

                    if (shortestPath.size() > 0) {
                        // we have a path
                        GeneratedArgumentPair ap = new GeneratedArgumentPair();
                        Argument arg1 = allArguments.get(j.getId());

                        if (arg1 == null) {
                            throw new IllegalStateException("Cannot find argument " + j.getId());
                        }
                        ap.setArg1(arg1);

                        Argument arg2 = allArguments.get(k.getId());

                        if (arg2 == null) {
                            throw new IllegalStateException("Cannot find argument " + k.getId());
                        }
                        ap.setArg2(arg2);

                        ap.setGoldLabel("a1");
                        generatedArgumentPairs.add(ap);
                    }
                }
            }
        }
        // and now add the reverse ones
        Set<GeneratedArgumentPair> generatedReversePairs = new HashSet<>();
        for (GeneratedArgumentPair pair : generatedArgumentPairs) {
            GeneratedArgumentPair ap = new GeneratedArgumentPair();
            ap.setArg1(pair.getArg2());
            ap.setArg2(pair.getArg1());
            ap.setGoldLabel("a2");
            generatedReversePairs.add(ap);
        }
        generatedArgumentPairs.addAll(generatedReversePairs);
        // and save it
        XStreamTools.toXML(generatedArgumentPairs, new File(outputDir, "generated_" + prefix + file.getName()));
    }

    result.fullPairsSize = fullDataSize;
    result.removedApriori = (fullDataSize - preFilteredDataSize);
    result.finalPairsRetained = finalArgumentPairList.size();

    // save the final graph
    Graph outGraph = cleanCopyGraph(lastGraph);
    FileSinkDGS dgs1 = new FileSinkDGS();
    File outFile = new File(outputDir, prefix + file.getName() + ".dgs");

    System.out.println("Saved to " + outFile);
    FileWriter w1 = new FileWriter(outFile);

    dgs1.writeAll(outGraph, w1);
    w1.close();

    return result;
}

From source file:org.apache.solr.client.solrj.io.eval.CorrelationSignificanceEvaluator.java

@Override
public Object doWork(Object value) throws IOException {
    if (null == value) {
        return null;
    } else if (value instanceof Matrix) {
        Matrix matrix = (Matrix) value;//  w ww .j av a 2 s  .com
        Object corr = matrix.getContextValue("corr");
        if (corr instanceof PearsonsCorrelation) {
            PearsonsCorrelation pcorr = (PearsonsCorrelation) corr;
            RealMatrix realMatrix = pcorr.getCorrelationPValues();
            return new Matrix(realMatrix.getData());
        } else {
            throw new IOException(
                    "Correlation pvalues are only available for Pearsons and Spearmans correlations");
        }
    } else {
        throw new IOException("matrix parameter expected for transpose function");
    }
}

From source file:org.meteoinfo.math.stats.StatsUtil.java

/**
 * Calculates a Pearson correlation coefficient.
 *
 * @param x X data/*from w  w  w  .  j  a v a2  s.c o  m*/
 * @param y Y data
 * @return Pearson correlation and p-value.
 */
public static double[] pearsonr(Array x, Array y) {
    int m = x.getShape()[0];
    int n = 1;
    double[][] aa = new double[m][n * 2];
    for (int i = 0; i < m; i++) {
        for (int j = 0; j < n * 2; j++) {
            if (j < n) {
                aa[i][j] = x.getDouble(i * n + j);
            } else {
                aa[i][j] = y.getDouble(i * n + j - n);
            }
        }
    }
    RealMatrix matrix = new Array2DRowRealMatrix(aa, false);
    PearsonsCorrelation pc = new PearsonsCorrelation(matrix);
    double r = pc.getCorrelationMatrix().getEntry(0, 1);
    double pvalue = pc.getCorrelationPValues().getEntry(0, 1);
    return new double[] { r, pvalue };
}

From source file:restclient.service.DailyRecordFacadeREST.java

@GET
@Path("findCorrelationByStartAndEndDateAndWeatherVariable/{startDate}/{endDate}/{attribute}")
@Produces({ "application/json" })
public String findCorrelationByStartAndEndDateAndWeatherVariable(@PathParam("startDate") Date startDate,
        @PathParam("endDate") Date endDate, @PathParam("attribute") String attribute) {
    String attributeInLowerCase = attribute.toLowerCase();
    switch (attributeInLowerCase) {
    case "windspeed":
    case "wind speed":
        attributeInLowerCase = "windSpeed";
        break;//from  w ww. j av a 2s. co m
    case "atmosphericpressure":
    case "atmospheric pressure":
        attributeInLowerCase = "atmosphericPressure";
        break;
    default:
        break;
    }
    String sth = "NEW restclient.Result2(d.painLevel, d." + attributeInLowerCase + ")";
    String jpql = "SELECT " + sth
            + " FROM restclient.DailyRecord d WHERE d.recordDate >= :startDate AND d.recordDate <= :endDate";
    TypedQuery<Result2> q = em.createQuery(jpql, Result2.class);
    q.setParameter("startDate", startDate);
    q.setParameter("endDate", endDate);
    List<Result2> result = q.getResultList();
    double data[][] = new double[result.size()][];
    for (int i = 0; i < result.size(); i++) {
        data[i] = new double[] { result.get(i).painLevel, result.get(i).weather };
    }
    RealMatrix m = MatrixUtils.createRealMatrix(data);
    String first = "";
    for (int i = 0; i < m.getColumnDimension(); i++)
        for (int j = 0; j < m.getColumnDimension(); j++) {
            PearsonsCorrelation pc = new PearsonsCorrelation();
            double cor = pc.correlation(m.getColumn(i), m.getColumn(j));
            first += (i + "," + j + "=[" + String.format(".%2f", cor) + "," + "]" + ";   ");
        }
    PearsonsCorrelation pc = new PearsonsCorrelation(m);
    RealMatrix corM = pc.getCorrelationMatrix();
    String second = ("!correlation:" + corM.getEntry(0, 1) + "   ");
    RealMatrix pM = pc.getCorrelationPValues();
    String third = ("!p value:" + pM.getEntry(0, 1));
    return first + second + third;
}

From source file:restclient.service.RecordFacadeREST.java

@GET
@Path("findCorrelation/{uid}/{sdate}/{edate}/{wvariable}")
@Produces({ "application/json" })
public List<Correlation> findCorrelation(@PathParam("uid") Integer uid, @PathParam("sdate") String date1,
        @PathParam("edate") String date2, @PathParam("wvariable") String wv) throws ParseException {
    SimpleDateFormat sdf1 = new SimpleDateFormat("yyyy-MM-dd");
    SimpleDateFormat sdf2 = new SimpleDateFormat("dd/MM/yyyy");
    Date sdate = sdf1.parse(date1);
    Date edate = sdf1.parse(date2);
    TypedQuery<Record> q = em.createQuery(
            "SELECT r FROM Record r WHERE r.date >= :sdate AND r.date <= :edate AND r.uid.uid = :uid order by r.date ASC",
            Record.class);
    q.setParameter("uid", uid);
    q.setParameter("sdate", sdate);
    q.setParameter("edate", edate);
    List<Record> qr = q.getResultList();
    List<Correlation> re = new ArrayList<Correlation>();
    Correlation cl = new Correlation();
    double data[][] = new double[qr.size()][2];
    for (int i = 0; i < qr.size(); ++i) {
        Record r = qr.get(i);//  w  w w .j  av  a  2 s.  c om
        data[i][0] = r.getPlevel();
        if (wv.equals("temperature")) {
            data[i][1] = r.getTemp();
        } else if (wv.equals("humidity")) {
            data[i][1] = r.getHumidity();
        } else if (wv.equals("windspeed")) {
            data[i][1] = r.getWindspeed();
        } else {
            data[i][1] = r.getPressure();
        }
    }
    RealMatrix m = MatrixUtils.createRealMatrix(data);
    PearsonsCorrelation pc = new PearsonsCorrelation(m);
    RealMatrix corM = pc.getCorrelationMatrix();
    cl.setRvalue(corM.getEntry(0, 1));
    RealMatrix pM = pc.getCorrelationPValues();
    cl.setSvalue(pM.getEntry(0, 1));
    re.add(cl);
    return re;
}