Example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics getMean

List of usage examples for org.apache.commons.math.stat.descriptive DescriptiveStatistics getMean

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics getMean.

Prototype

public double getMean() 

Source Link

Document

Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> arithmetic mean </a> of the available values

Usage

From source file:com.joliciel.jochre.graphics.SourceImageImpl.java

public List<Rectangle> findColumnSeparators() {
    if (columnSeparators == null) {
        LOG.debug("############ findColumnSeparators ##############");
        double slope = this.getMeanHorizontalSlope();

        double imageMidPointX = (double) this.getWidth() / 2.0;

        int[] horizontalCounts = new int[this.getHeight()];
        DescriptiveStatistics rowXHeightStats = new DescriptiveStatistics();
        // first get the fill factor for each horizontal row in the image
        for (RowOfShapes row : this.getRows()) {
            rowXHeightStats.addValue(row.getXHeight());
            for (Shape shape : row.getShapes()) {
                double shapeMidPointX = (double) (shape.getLeft() + shape.getRight()) / 2.0;
                int slopeAdjustedTop = (int) Math
                        .round(shape.getTop() + (slope * (shapeMidPointX - imageMidPointX)));
                if (slopeAdjustedTop >= 0 && slopeAdjustedTop < this.getHeight()) {
                    for (int i = 0; i < shape.getHeight(); i++) {
                        if (slopeAdjustedTop + i < horizontalCounts.length)
                            horizontalCounts[slopeAdjustedTop + i] += shape.getWidth();
                    }//w w w. j  a va 2  s.c  om
                }
            }
        }
        DescriptiveStatistics horizontalStats = new DescriptiveStatistics();
        DescriptiveStatistics horizontalStatsNonEmpty = new DescriptiveStatistics();
        for (int i = 0; i < this.getHeight(); i++) {
            //         LOG.trace("Row " + i + ": " + horizontalCounts[i]);
            horizontalStats.addValue(horizontalCounts[i]);
            if (horizontalCounts[i] > 0)
                horizontalStatsNonEmpty.addValue(horizontalCounts[i]);
        }
        LOG.debug("Mean horizontal count: " + horizontalStats.getMean());
        LOG.debug("Median horizontal count: " + horizontalStats.getPercentile(50));
        LOG.debug("25 percentile horizontal count: " + horizontalStats.getPercentile(25));
        LOG.debug("Mean horizontal count (non empty): " + horizontalStatsNonEmpty.getMean());
        LOG.debug("Median horizontal count (non empty): " + horizontalStatsNonEmpty.getPercentile(50));
        LOG.debug("25 percentile horizontal count (non empty): " + horizontalStatsNonEmpty.getPercentile(25));
        LOG.debug("10 percentile horizontal count (non empty): " + horizontalStatsNonEmpty.getPercentile(10));

        double maxEmptyRowCount = horizontalStatsNonEmpty.getMean() / 8.0;
        LOG.debug("maxEmptyRowCount: " + maxEmptyRowCount);

        boolean inEmptyHorizontalRange = false;
        List<int[]> emptyHorizontalRanges = new ArrayList<int[]>();
        int emptyHorizontalRangeStart = 0;
        for (int i = 0; i < this.getHeight(); i++) {
            if (!inEmptyHorizontalRange && horizontalCounts[i] <= maxEmptyRowCount) {
                inEmptyHorizontalRange = true;
                emptyHorizontalRangeStart = i;
            } else if (inEmptyHorizontalRange && horizontalCounts[i] > maxEmptyRowCount) {
                inEmptyHorizontalRange = false;
                emptyHorizontalRanges.add(new int[] { emptyHorizontalRangeStart, i });
            }
        }
        if (inEmptyHorizontalRange) {
            emptyHorizontalRanges.add(new int[] { emptyHorizontalRangeStart, this.getHeight() - 1 });
        }

        LOG.debug("rowXHeight mean: " + rowXHeightStats.getMean());
        LOG.debug("rowXHeight median: " + rowXHeightStats.getPercentile(50));
        double minHorizontalBreak = rowXHeightStats.getMean() * 2.0;
        LOG.debug("minHorizontalBreak: " + minHorizontalBreak);
        int smallBreakCount = 0;
        int mainTextTop = 0;
        int bigBreakCount = 0;
        for (int[] emptyHorizontalRange : emptyHorizontalRanges) {
            int height = emptyHorizontalRange[1] - emptyHorizontalRange[0];
            LOG.trace("empty range: " + emptyHorizontalRange[0] + ", " + emptyHorizontalRange[1] + " = "
                    + height);
            if (bigBreakCount < 2 && smallBreakCount < 2 && height > minHorizontalBreak) {
                mainTextTop = emptyHorizontalRange[1];
                bigBreakCount++;
            }
            if (height <= minHorizontalBreak)
                smallBreakCount++;
        }

        LOG.debug("mainTextTop:" + mainTextTop);
        // lift mainTextTop upwards by max an x-height or till we reach a zero row
        int minTop = mainTextTop - (int) (rowXHeightStats.getMean() / 2.0);
        if (minTop < 0)
            minTop = 0;
        for (int i = mainTextTop; i > minTop; i--) {
            mainTextTop = i;
            if (horizontalCounts[i] == 0) {
                break;
            }
        }
        LOG.debug("mainTextTop (adjusted):" + mainTextTop);

        smallBreakCount = 0;
        bigBreakCount = 0;
        int mainTextBottom = this.getHeight();
        for (int i = emptyHorizontalRanges.size() - 1; i >= 0; i--) {
            int[] emptyHorizontalRange = emptyHorizontalRanges.get(i);
            int height = emptyHorizontalRange[1] - emptyHorizontalRange[0];
            LOG.trace("emptyHorizontalRange: " + emptyHorizontalRange[0] + ", height: " + height
                    + ", bigBreakCount: " + bigBreakCount + ", smallBreakCount: " + smallBreakCount);
            if ((bigBreakCount + smallBreakCount) <= 2 && height > minHorizontalBreak) {
                mainTextBottom = emptyHorizontalRange[0];
                LOG.trace("Set mainTextBottom to " + mainTextBottom);
                bigBreakCount++;
            }
            if (height <= minHorizontalBreak)
                smallBreakCount++;
            if ((bigBreakCount + smallBreakCount) > 2)
                break;
        }
        LOG.debug("mainTextBottom:" + mainTextBottom);
        // lower mainTextBottom downwards by max an x-height or till we reach a zero row
        int maxBottom = mainTextBottom + (int) (rowXHeightStats.getMean() / 2.0);
        if (maxBottom > this.getHeight())
            maxBottom = this.getHeight();
        for (int i = mainTextBottom; i < maxBottom; i++) {
            mainTextBottom = i;
            if (horizontalCounts[i] == 0) {
                break;
            }
        }
        LOG.debug("mainTextBottom (adjusted):" + mainTextBottom);

        int[] verticalCounts = new int[this.getWidth()];
        // first get the fill factor for each horizontal row in the image
        for (RowOfShapes row : this.getRows()) {
            for (Shape shape : row.getShapes()) {
                int slopeAdjustedLeft = (int) Math.round(shape.getLeft() - row.getXAdjustment());
                double shapeMidPointX = (double) (shape.getLeft() + shape.getRight()) / 2.0;
                int slopeAdjustedTop = (int) Math
                        .round(shape.getTop() + (slope * (shapeMidPointX - imageMidPointX)));
                if (slopeAdjustedTop >= mainTextTop && slopeAdjustedTop <= mainTextBottom
                        && slopeAdjustedLeft >= 0 && slopeAdjustedLeft < this.getWidth()) {
                    for (int i = 0; i < shape.getWidth(); i++) {
                        if (slopeAdjustedLeft + i < this.getWidth())
                            verticalCounts[slopeAdjustedLeft + i] += shape.getHeight();
                    }
                }
            }
        }

        DescriptiveStatistics verticalStats = new DescriptiveStatistics();
        DescriptiveStatistics verticalStatsNonEmpty = new DescriptiveStatistics();
        for (int i = 0; i < this.getWidth(); i++) {
            //         LOG.trace("Column " + i + ": " + verticalCounts[i]);
            verticalStats.addValue(verticalCounts[i]);
            if (verticalCounts[i] > 0)
                verticalStatsNonEmpty.addValue(verticalCounts[i]);
        }
        LOG.debug("Mean vertical count: " + verticalStats.getMean());
        LOG.debug("Median vertical count: " + verticalStats.getPercentile(50));
        LOG.debug("25 percentile vertical count: " + verticalStats.getPercentile(25));
        LOG.debug("Mean vertical count (non empty): " + verticalStatsNonEmpty.getMean());
        LOG.debug("Median vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(50));
        LOG.debug("25 percentile vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(25));
        LOG.debug("10 percentile vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(10));
        LOG.debug("1 percentile vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(1));

        //         double maxEmptyColumnCount = verticalStatsNonEmpty.getMean() / 8.0;
        double maxEmptyColumnCount = verticalStatsNonEmpty.getPercentile(1);
        LOG.debug("maxEmptyColumnCount: " + maxEmptyColumnCount);

        boolean inEmptyVerticalRange = false;
        List<int[]> emptyVerticalRanges = new ArrayList<int[]>();
        int emptyVerticalRangeStart = 0;
        for (int i = 0; i < this.getWidth(); i++) {
            if (!inEmptyVerticalRange && verticalCounts[i] <= maxEmptyColumnCount) {
                inEmptyVerticalRange = true;
                emptyVerticalRangeStart = i;
            } else if (inEmptyVerticalRange && verticalCounts[i] > maxEmptyColumnCount) {
                inEmptyVerticalRange = false;
                emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, i });
            }
        }
        if (inEmptyVerticalRange) {
            emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, this.getWidth() - 1 });
        }

        LOG.debug("rowXHeight mean: " + rowXHeightStats.getMean());
        LOG.debug("rowXHeight median: " + rowXHeightStats.getPercentile(50));
        double minVerticalBreak = rowXHeightStats.getMean() * 1.0;
        LOG.debug("minVerticalBreak: " + minVerticalBreak);

        List<int[]> columnBreaks = new ArrayList<int[]>();
        for (int[] emptyVerticalRange : emptyVerticalRanges) {
            int width = emptyVerticalRange[1] - emptyVerticalRange[0];
            LOG.trace("empty range: " + emptyVerticalRange[0] + ", " + emptyVerticalRange[1] + " = " + width);

            if (width >= minVerticalBreak) {
                columnBreaks.add(emptyVerticalRange);
                LOG.trace("Found column break!");
            }
        }

        columnSeparators = new ArrayList<Rectangle>();
        for (int[] columnBreak : columnBreaks) {
            // reduce the column break to the thickest empty area if possible
            int[] bestColumnBreak = null;
            double originalCount = maxEmptyColumnCount;
            maxEmptyColumnCount = 0;
            while (bestColumnBreak == null && maxEmptyColumnCount <= originalCount) {
                inEmptyVerticalRange = false;
                emptyVerticalRanges = new ArrayList<int[]>();
                emptyVerticalRangeStart = columnBreak[0];
                for (int i = columnBreak[0]; i <= columnBreak[1]; i++) {
                    if (!inEmptyVerticalRange && verticalCounts[i] <= maxEmptyColumnCount) {
                        inEmptyVerticalRange = true;
                        emptyVerticalRangeStart = i;
                    } else if (inEmptyVerticalRange && verticalCounts[i] > maxEmptyColumnCount) {
                        inEmptyVerticalRange = false;
                        emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, i });
                    }
                }
                if (inEmptyVerticalRange) {
                    emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, columnBreak[1] });
                }

                for (int[] emptyVerticalRange : emptyVerticalRanges) {
                    if (bestColumnBreak == null || (emptyVerticalRange[1]
                            - emptyVerticalRange[0] > bestColumnBreak[1] - bestColumnBreak[0]))
                        bestColumnBreak = emptyVerticalRange;
                }
                maxEmptyColumnCount += (originalCount / 8.0);
            }

            if (bestColumnBreak == null)
                bestColumnBreak = columnBreak;

            Rectangle whiteArea = new WhiteArea(bestColumnBreak[0], mainTextTop, bestColumnBreak[1],
                    mainTextBottom);
            columnSeparators.add(whiteArea);
            LOG.debug("ColumnBreak: " + whiteArea);
        } // next column break
    }
    return columnSeparators;
}

From source file:guineu.modules.dataanalysis.foldChanges.FoldTestTask.java

public double Foldtest(int mol) throws IllegalArgumentException, MathException {
    DescriptiveStatistics stats1 = new DescriptiveStatistics();
    DescriptiveStatistics stats2 = new DescriptiveStatistics();

    String parameter1 = "";

    try {/* www. j a v a  2s .c  o  m*/
        // Determine groups for selected raw data files
        List<String> availableParameterValues = dataset.getParameterAvailableValues(parameter);

        int numberOfGroups = availableParameterValues.size();

        if (numberOfGroups > 1) {
            parameter1 = availableParameterValues.get(0);
            String parameter2 = availableParameterValues.get(1);

            for (String sampleName : dataset.getAllColumnNames()) {
                if (dataset.getParametersValue(sampleName, parameter) != null
                        && dataset.getParametersValue(sampleName, parameter).equals(parameter1)) {
                    stats1.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                } else if (dataset.getParametersValue(sampleName, parameter) != null
                        && dataset.getParametersValue(sampleName, parameter).equals(parameter2)) {
                    stats2.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                }
            }
        } else {
            return -1;
        }

    } catch (Exception e) {
        e.printStackTrace();
    }

    if (stats1.getN() > 0 && stats2.getN() > 0) {
        /*double[] sortValues1 = stats1.getSortedValues();
         double[] sortValues2 = stats2.getSortedValues();
                
         return sortValues1[((int) stats1.getN() / 2)] / sortValues2[((int) stats2.getN() / 2)];*/
        return stats1.getMean() / stats2.getMean();
    } else {
        return 0;
    }
}

From source file:guineu.modules.dataanalysis.Ttest.TTestTask.java

public double[] Ttest(int mol) throws IllegalArgumentException, MathException {
    DescriptiveStatistics stats1 = new DescriptiveStatistics();
    DescriptiveStatistics stats2 = new DescriptiveStatistics();
    double[] values = new double[3];
    String parameter1 = "";

    try {/*from   ww w  .  j  ava  2  s  . c o  m*/
        // Determine groups for selected raw data files
        List<String> availableParameterValues = dataset.getParameterAvailableValues(parameter);

        int numberOfGroups = availableParameterValues.size();

        if (numberOfGroups > 1) {
            parameter1 = availableParameterValues.get(0);
            String parameter2 = availableParameterValues.get(1);

            for (String sampleName : dataset.getAllColumnNames()) {
                if (dataset.getParametersValue(sampleName, parameter) != null
                        && dataset.getParametersValue(sampleName, parameter).equals(parameter1)) {
                    try {
                        stats1.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                    } catch (Exception e) {

                    }
                } else if (dataset.getParametersValue(sampleName, parameter) != null
                        && dataset.getParametersValue(sampleName, parameter).equals(parameter2)) {
                    try {
                        stats2.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                    } catch (Exception e) {

                    }
                }
            }
        } else {
            return null;
        }
    } catch (Exception e) {
    }

    TTestImpl ttest = new TTestImpl();
    values[0] = ttest.tTest((StatisticalSummary) stats1, (StatisticalSummary) stats2);
    values[1] = stats1.getMean();
    values[2] = stats2.getMean();
    return values;
}

From source file:com.joliciel.talismane.other.corpus.CorpusStatistics.java

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration, Writer writer) {
    sentenceCount++;//from   www. java 2  s.  c o  m
    sentenceLengthStats.addValue(parseConfiguration.getPosTagSequence().size());

    for (PosTaggedToken posTaggedToken : parseConfiguration.getPosTagSequence()) {
        if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG))
            continue;

        Token token = posTaggedToken.getToken();

        String word = token.getOriginalText();
        words.add(word);
        if (referenceWords != null) {
            if (!referenceWords.contains(word))
                unknownTokenCount++;
        }
        if (alphanumeric.matcher(token.getOriginalText()).find()) {
            String lowercase = word.toLowerCase(TalismaneSession.getLocale());
            lowerCaseWords.add(lowercase);
            alphanumericCount++;
            if (referenceLowercaseWords != null) {
                if (!referenceLowercaseWords.contains(lowercase))
                    unknownAlphanumericCount++;
            }
        }

        tokenCount++;

        Integer countObj = posTagCounts.get(posTaggedToken.getTag().getCode());
        int count = countObj == null ? 0 : countObj.intValue();
        count++;
        posTagCounts.put(posTaggedToken.getTag().getCode(), count);
    }

    int maxDepth = 0;
    DescriptiveStatistics avgSyntaxDepthForSentenceStats = new DescriptiveStatistics();
    for (DependencyArc arc : parseConfiguration.getDependencies()) {
        Integer countObj = depLabelCounts.get(arc.getLabel());
        int count = countObj == null ? 0 : countObj.intValue();
        count++;
        depLabelCounts.put(arc.getLabel(), count);
        totalDepCount++;

        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)
                && (arc.getLabel() == null || arc.getLabel().length() == 0)) {
            // do nothing for unattached stuff (e.g. punctuation)
        } else if (arc.getLabel().equals("ponct")) {
            // do nothing for punctuation
        } else {
            int depth = 0;
            DependencyArc theArc = arc;
            while (theArc != null && !theArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)) {
                theArc = parseConfiguration.getGoverningDependency(theArc.getHead());
                depth++;
            }
            if (depth > maxDepth)
                maxDepth = depth;

            syntaxDepthStats.addValue(depth);
            avgSyntaxDepthForSentenceStats.addValue(depth);

            int distance = Math
                    .abs(arc.getHead().getToken().getIndex() - arc.getDependent().getToken().getIndex());
            syntaxDistanceStats.addValue(distance);
        }

        maxSyntaxDepthStats.addValue(maxDepth);
        if (avgSyntaxDepthForSentenceStats.getN() > 0)
            avgSyntaxDepthStats.addValue(avgSyntaxDepthForSentenceStats.getMean());
    }

    // we cheat a little bit by only allowing each arc to count once
    // there could be a situation where there are two independent non-projective arcs
    // crossing the same mother arc, but we prefer here to underestimate,
    // as this phenomenon is quite rare.
    Set<DependencyArc> nonProjectiveArcs = new HashSet<DependencyArc>();
    int i = 0;
    for (DependencyArc arc : parseConfiguration.getDependencies()) {
        i++;
        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)
                && (arc.getLabel() == null || arc.getLabel().length() == 0))
            continue;
        if (nonProjectiveArcs.contains(arc))
            continue;

        int headIndex = arc.getHead().getToken().getIndex();
        int depIndex = arc.getDependent().getToken().getIndex();
        int startIndex = headIndex < depIndex ? headIndex : depIndex;
        int endIndex = headIndex >= depIndex ? headIndex : depIndex;
        int j = 0;
        for (DependencyArc otherArc : parseConfiguration.getDependencies()) {
            j++;
            if (j <= i)
                continue;
            if (otherArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)
                    && (otherArc.getLabel() == null || otherArc.getLabel().length() == 0))
                continue;
            if (nonProjectiveArcs.contains(otherArc))
                continue;

            int headIndex2 = otherArc.getHead().getToken().getIndex();
            int depIndex2 = otherArc.getDependent().getToken().getIndex();
            int startIndex2 = headIndex2 < depIndex2 ? headIndex2 : depIndex2;
            int endIndex2 = headIndex2 >= depIndex2 ? headIndex2 : depIndex2;
            boolean nonProjective = false;
            if (startIndex2 < startIndex && endIndex2 > startIndex && endIndex2 < endIndex) {
                nonProjective = true;
            } else if (startIndex2 > startIndex && startIndex2 < endIndex && endIndex2 > endIndex) {
                nonProjective = true;
            }
            if (nonProjective) {
                nonProjectiveArcs.add(arc);
                nonProjectiveArcs.add(otherArc);
                nonProjectiveCount++;
                LOG.debug("Non-projective arcs in sentence: " + parseConfiguration.getSentence().getText());
                LOG.debug(arc.toString());
                LOG.debug(otherArc.toString());
                break;
            }
        }
    }
}

From source file:com.mozilla.socorro.hadoop.RawDumpSize.java

public int run(String[] args) throws Exception {
    if (args.length != 1) {
        return printUsage();
    }//from   w ww .ja  v  a 2  s  . co  m

    int rc = -1;
    Job job = initJob(args);
    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        rc = 0;
        FileSystem hdfs = null;
        DescriptiveStatistics rawStats = new DescriptiveStatistics();
        long rawTotal = 0L;
        DescriptiveStatistics processedStats = new DescriptiveStatistics();
        long processedTotal = 0L;
        try {
            hdfs = FileSystem.get(job.getConfiguration());
            Pattern tabPattern = Pattern.compile("\t");
            for (FileStatus status : hdfs.listStatus(FileOutputFormat.getOutputPath(job))) {
                if (!status.isDir()) {
                    BufferedReader reader = null;
                    try {
                        reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath())));
                        String line = null;
                        while ((line = reader.readLine()) != null) {
                            String[] splits = tabPattern.split(line);
                            int byteSize = Integer.parseInt(splits[2]);
                            if ("raw".equals(splits[1])) {
                                rawStats.addValue(byteSize);
                                rawTotal += byteSize;
                            } else if ("processed".equals(splits[1])) {
                                processedStats.addValue(byteSize);
                                processedTotal += byteSize;
                            }
                        }
                    } finally {
                        if (reader != null) {
                            reader.close();
                        }
                    }
                }
            }
        } finally {
            if (hdfs != null) {
                hdfs.close();
            }
        }

        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " raw_data:dump =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", rawStats.getMin(),
                rawStats.getMax(), rawStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                rawStats.getPercentile(25.0d), rawStats.getPercentile(50.0d), rawStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + rawTotal);
        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " processed_data:json =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", processedStats.getMin(),
                processedStats.getMax(), processedStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                processedStats.getPercentile(25.0d), processedStats.getPercentile(50.0d),
                processedStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + processedTotal);
    }

    return rc;
}

From source file:edu.usc.goffish.gopher.sample.stats.N_Hop_Stats.java

@Override
public void compute(List<SubGraphMessage> messageList) {

    if (getIteration() == 0 && getSuperStep() == 0) {
        String data = new String(messageList.get(0).getData());

        //   debugLog("GOT DATA initial :" + data);
        hopCount = Integer.parseInt(data);

        try {/*from   w  w  w.j a va  2  s  .c o  m*/
            init();
        } catch (IOException e) {
            e.printStackTrace();
            throw new RuntimeException();
        }

    }
    long ls = System.currentTimeMillis();
    ISubgraphInstance instance = getCurrentInstance();

    if (instance == null) {
        //    debugLog("Instance == null : " + getIteration());
        voteToHalt();
        haultApp();
        return;
    }

    if (getSuperStep() == 0) {

        DescriptiveStatistics statistics = new DescriptiveStatistics();

        long diskTimeStart = System.currentTimeMillis();
        if (!instance.hasProperties()) {
            //    debugLog("No Properties : " + getIteration());
            voteToHalt();
            return;
        }

        debugLog("INSTANCE_LOAD," + subgraph.getId() + "," + (System.currentTimeMillis() - diskTimeStart) + ","
                + getSuperStep() + "," + getIteration());

        long travasalS = System.currentTimeMillis();
        // DescriptiveStatistics edgePropLoadTimeStats = new DescriptiveStatistics();
        for (ITemplateEdge edge : subgraph.edges()) {
            //   long edgePropStart = System.currentTimeMillis();
            ISubgraphObjectProperties edgeProps = instance.getPropertiesForEdge(edge.getId());
            //  edgePropLoadTimeStats.addValue(System.currentTimeMillis() - edgePropStart);

            String[] latencies = ((String) edgeProps.getValue(LATENCY_PROP)) == null ? null
                    : ((String) edgeProps.getValue(LATENCY_PROP)).split(",");
            String[] hops = ((String) edgeProps.getValue(HOP_PROP)) == null ? null
                    : ((String) edgeProps.getValue(HOP_PROP)).split(",");

            if (hops != null && latencies != null) {

                for (int i = 0; i < hops.length; i++) {
                    String h = hops[i];

                    if (hopCount == Integer.parseInt(h)) {
                        //              debugLog("HOP : " + h + ": Latency : " + latencies[i]);
                        double latency = Double.parseDouble(latencies[i]);
                        statistics.addValue(latency);
                    }
                }
            }

        }

        //debugLog("Travasal total : " + (System.currentTimeMillis() - travasalS));
        //debugLog("Edge Load Time max,avg:" + edgePropLoadTimeStats.getMax() + "," + edgePropLoadTimeStats.getMean());

        String data = "1:" + statistics.getMean();
        if (!"1:nan".equalsIgnoreCase(data)) {
            SubGraphMessage message = new SubGraphMessage(data.getBytes());
            sendMessage(partition.getId(), message);
            //debugLog("Sub-graph data sent : " + data);
        }
        voteToHalt();

    } else {

        if (acquireLock("N_HOP_" + partition.getId() + " _" + getIteration() + "_" + getSuperStep())) {
            //debugLog("Lock Acqured");
            DescriptiveStatistics statistics = new DescriptiveStatistics();
            boolean finalStage = false;
            for (SubGraphMessage msg : messageList) {

                String data = new String(msg.getData());
                //debugLog("Partittion got data : " + data);
                String[] parts = data.split(":");
                if ("1".equals(parts[0].trim())) {
                    if (!parts[1].equalsIgnoreCase("nan")) {
                        statistics.addValue(Double.parseDouble(parts[1]));
                        //debugLog("Stage 1 data added : " + parts[1]);
                    }
                } else {
                    finalStage = true;
                    if (!parts[1].equalsIgnoreCase("nan")) {
                        statistics.addValue(Double.parseDouble(parts[1]));
                        //debugLog("Stage 2 data added : " + parts[1]);
                    }
                }

            }

            if (finalStage) {
                try {

                    String data = "" + statistics.getMean();
                    try {
                        Double.parseDouble(data);
                        sendMessageToReduceStep(new SubGraphMessage(data.getBytes()));
                    } catch (Exception e) {

                    }

                    PrintWriter writer = new PrintWriter(new FileWriter("Hop_Stats.log", true));
                    log(writer, hopCount, statistics.getMean(), currentInstance.getTimestampStart());
                } catch (Exception e) {
                    e.printStackTrace();
                }
                voteToHalt();

            } else {

                String data = "2:" + statistics.getMean();

                if (!"2:nan".equalsIgnoreCase(data)) {
                    SubGraphMessage message = new SubGraphMessage(data.getBytes());
                    for (int i : partitions) {
                        sendMessage(i, message);
                    }
                    //debugLog("Stage 2 data sent :" + data);
                }
                voteToHalt();
            }
        } else {
            voteToHalt();
        }

    }

}

From source file:com.joliciel.talismane.stats.FScoreCalculator.java

/**
 * Combine the results of n cross validation results into a single f-score file.
 * @param directory// w  ww  . j a  v a  2 s  .co m
 * @param prefix
 * @param suffix
 * @param csvFileWriter
 */
static void combineCrossValidationResults(File directory, String prefix, String suffix, Writer csvFileWriter) {
    try {
        File[] files = directory.listFiles();
        Map<Integer, Map<String, FScoreStats>> fileStatsMap = new HashMap<Integer, Map<String, FScoreStats>>();
        for (File file : files) {
            if (file.getName().startsWith(prefix) && file.getName().endsWith(suffix)) {
                int index = Integer.parseInt(file.getName().substring(prefix.length(), prefix.length() + 1));
                Map<String, FScoreStats> statsMap = new HashMap<String, FScoreCalculator.FScoreStats>();
                fileStatsMap.put(index, statsMap);
                Scanner scanner = new Scanner(
                        new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")));

                boolean firstLine = true;
                int truePositivePos = -1;

                while (scanner.hasNextLine()) {
                    String line = scanner.nextLine();
                    List<String> cells = CSV.getCSVCells(line);
                    if (firstLine) {
                        int i = 0;
                        for (String cell : cells) {
                            if (cell.equals("true+")) {
                                truePositivePos = i;
                                break;
                            }
                            i++;
                        }
                        if (truePositivePos < 0) {
                            throw new JolicielException("Couldn't find true+ on first line");
                        }
                        firstLine = false;
                    } else {
                        FScoreStats stats = new FScoreStats();
                        String outcome = cells.get(0);
                        stats.outcome = outcome;
                        if (outcome.equals("AVERAGE"))
                            break;
                        stats.truePos = Integer.parseInt(cells.get(truePositivePos));
                        stats.falsePos = Integer.parseInt(cells.get(truePositivePos + 1));
                        stats.falseNeg = Integer.parseInt(cells.get(truePositivePos + 2));
                        stats.precision = Double.parseDouble(cells.get(truePositivePos + 3));
                        stats.recall = Double.parseDouble(cells.get(truePositivePos + 4));
                        stats.fScore = Double.parseDouble(cells.get(truePositivePos + 5));
                        statsMap.put(outcome, stats);
                    } // firstLine?
                } // has more lines
                scanner.close();
            } // file in current series
        } // next file

        int numFiles = fileStatsMap.size();
        if (numFiles == 0) {
            throw new JolicielException("No files found matching prefix and suffix provided");
        }
        Map<String, DescriptiveStatistics> descriptiveStatsMap = new HashMap<String, DescriptiveStatistics>();
        Map<String, FScoreStats> outcomeStats = new HashMap<String, FScoreCalculator.FScoreStats>();
        Set<String> outcomes = new TreeSet<String>();
        for (Map<String, FScoreStats> statsMap : fileStatsMap.values()) {
            for (FScoreStats stats : statsMap.values()) {
                DescriptiveStatistics fScoreStats = descriptiveStatsMap.get(stats.outcome + "fScore");
                if (fScoreStats == null) {
                    fScoreStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "fScore", fScoreStats);
                }
                fScoreStats.addValue(stats.fScore);
                DescriptiveStatistics precisionStats = descriptiveStatsMap.get(stats.outcome + "precision");
                if (precisionStats == null) {
                    precisionStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "precision", precisionStats);
                }
                precisionStats.addValue(stats.precision);
                DescriptiveStatistics recallStats = descriptiveStatsMap.get(stats.outcome + "recall");
                if (recallStats == null) {
                    recallStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "recall", recallStats);
                }
                recallStats.addValue(stats.recall);

                FScoreStats outcomeStat = outcomeStats.get(stats.outcome);
                if (outcomeStat == null) {
                    outcomeStat = new FScoreStats();
                    outcomeStat.outcome = stats.outcome;
                    outcomeStats.put(stats.outcome, outcomeStat);
                }
                outcomeStat.truePos += stats.truePos;
                outcomeStat.falsePos += stats.falsePos;
                outcomeStat.falseNeg += stats.falseNeg;

                outcomes.add(stats.outcome);
            }
        }

        csvFileWriter.write(CSV.format(prefix + suffix));
        csvFileWriter.write("\n");
        csvFileWriter.write(CSV.format("outcome"));
        csvFileWriter.write(CSV.format("true+") + CSV.format("false+") + CSV.format("false-")
                + CSV.format("tot precision") + CSV.format("avg precision") + CSV.format("dev precision")
                + CSV.format("tot recall") + CSV.format("avg recall") + CSV.format("dev recall")
                + CSV.format("tot f-score") + CSV.format("avg f-score") + CSV.format("dev f-score") + "\n");

        for (String outcome : outcomes) {
            csvFileWriter.write(CSV.format(outcome));
            FScoreStats outcomeStat = outcomeStats.get(outcome);
            DescriptiveStatistics fScoreStats = descriptiveStatsMap.get(outcome + "fScore");
            DescriptiveStatistics precisionStats = descriptiveStatsMap.get(outcome + "precision");
            DescriptiveStatistics recallStats = descriptiveStatsMap.get(outcome + "recall");
            outcomeStat.calculate();
            csvFileWriter.write(CSV.format(outcomeStat.truePos));
            csvFileWriter.write(CSV.format(outcomeStat.falsePos));
            csvFileWriter.write(CSV.format(outcomeStat.falseNeg));
            csvFileWriter.write(CSV.format(outcomeStat.precision * 100));
            csvFileWriter.write(CSV.format(precisionStats.getMean()));
            csvFileWriter.write(CSV.format(precisionStats.getStandardDeviation()));
            csvFileWriter.write(CSV.format(outcomeStat.recall * 100));
            csvFileWriter.write(CSV.format(recallStats.getMean()));
            csvFileWriter.write(CSV.format(recallStats.getStandardDeviation()));
            csvFileWriter.write(CSV.format(outcomeStat.fScore * 100));
            csvFileWriter.write(CSV.format(fScoreStats.getMean()));
            csvFileWriter.write(CSV.format(fScoreStats.getStandardDeviation()));
            csvFileWriter.write("\n");
            csvFileWriter.flush();
        }
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}

From source file:com.joliciel.jochre.lexicon.LexiconErrorWriter.java

static void mergeCrossValidation(File evalDir, String prefix) {
    try {/*from   w  w w .j a v a  2s  . c  o m*/
        File[] files = evalDir.listFiles(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                if (name.endsWith(".csv"))
                    return true;
                else
                    return false;
            }
        });
        List<String> groupNames = new ArrayList<String>();
        Map<String, Writer> writers = new HashMap<String, Writer>();
        Map<String, ErrorStatistics> errorMap = new LinkedHashMap<String, ErrorStatistics>();
        Map<String, Map<String, DescriptiveStatistics>> statMap = new HashMap<String, Map<String, DescriptiveStatistics>>();
        for (File file : files) {
            String filename = file.getName();
            LOG.debug("Processing " + filename);
            int index = Integer.parseInt(filename.substring(prefix.length(), prefix.length() + 1));
            String suffix = filename.substring(prefix.length() + 2, filename.lastIndexOf('_'));
            String fileType = filename.substring(filename.lastIndexOf('_') + 1, filename.lastIndexOf('.'));
            LOG.debug("Processing " + filename);
            LOG.debug("index: " + index);
            LOG.debug("suffix: " + suffix);
            LOG.debug("fileType: " + fileType);
            Writer writer = writers.get(fileType);
            boolean firstFile = false;
            if (writer == null) {
                writer = new BufferedWriter(new OutputStreamWriter(
                        new FileOutputStream(
                                new File(evalDir, prefix + "A_" + suffix + "_" + fileType + ".csv"), false),
                        "UTF8"));
                writers.put(fileType, writer);
                firstFile = true;
            }
            if (fileType.equals("KEMatrix")) {
                Scanner scanner = new Scanner(file);
                int i = 0;
                List<String> myGroupNames = new ArrayList<String>();
                Map<String, Boolean> haveCountMap = new HashMap<String, Boolean>();
                while (scanner.hasNextLine()) {
                    String line = scanner.nextLine();
                    List<String> cells = CSV.getCSVCells(line);
                    if (i == 0) {
                        for (int j = 0; j < cells.size(); j += 5) {
                            String groupName = cells.get(j);
                            if (!errorMap.containsKey(groupName)) {
                                errorMap.put(groupName, new ErrorStatistics());
                                statMap.put(groupName, new HashMap<String, DescriptiveStatistics>());
                                groupNames.add(groupName);
                            }
                            myGroupNames.add(groupName);
                        }
                    } else if (i == 1) {
                        // do nothing
                    } else {
                        String rowName = cells.get(0);
                        int j = 0;
                        for (String groupName : myGroupNames) {
                            ErrorStatistics errorStats = errorMap.get(groupName);
                            Map<String, DescriptiveStatistics> stats = statMap.get(groupName);
                            double correctCount = Double.parseDouble(cells.get(j * 5 + 1));
                            double errorCount = Double.parseDouble(cells.get(j * 5 + 2));
                            double totalCount = Double.parseDouble(cells.get(j * 5 + 3));
                            Boolean haveCount = haveCountMap.get(groupName);

                            if (rowName.equals("known")) {
                                errorStats.knownWordCorrectCount += correctCount;
                                errorStats.knownWordErrorCount += errorCount;
                            } else if (rowName.equals("unknown")) {
                                errorStats.unknownWordCorrectCount += correctCount;
                                errorStats.unknownWordErrorCount += errorCount;
                            } else if (rowName.equals("goodSeg")) {
                                errorStats.goodSegCorrectCount += correctCount;
                                errorStats.goodSegErrorCount += errorCount;
                            } else if (rowName.equals("badSeg")) {
                                errorStats.badSegCorrectCount += correctCount;
                                errorStats.badSegErrorCount += errorCount;
                            } else if (rowName.equals("knownLetters")) {
                                errorStats.knownWordCorrectLetterCount += correctCount;
                                errorStats.knownWordErrorLetterCount += errorCount;
                            } else if (rowName.equals("unknownLetters")) {
                                errorStats.unknownWordCorrectLetterCount += correctCount;
                                errorStats.unknownWordErrorLetterCount += errorCount;
                            } else if (rowName.equals("goodSegLetters")) {
                                errorStats.goodSegCorrectLetterCount += correctCount;
                                errorStats.goodSegErrorLetterCount += errorCount;
                            } else if (rowName.equals("badSegLetters")) {
                                errorStats.badSegCorrectLetterCount += correctCount;
                                errorStats.badSegErrorLetterCount += errorCount;
                            } else if (rowName.equals("inBeam")) {
                                errorStats.answerInBeamCorrectCount += correctCount;
                                errorStats.answerInBeamErrorCount += errorCount;
                            } else if (rowName.equals("total")) {
                                haveCountMap.put(groupName, totalCount > 0);
                            } else if (rowName.endsWith("%")) {
                                if (haveCount) {
                                    String keyPrefix = rowName.substring(0, rowName.length() - 1);
                                    String key = keyPrefix + "|correct";
                                    DescriptiveStatistics correctStat = stats.get(key);
                                    if (correctStat == null) {
                                        correctStat = new DescriptiveStatistics();
                                        stats.put(key, correctStat);
                                    }
                                    correctStat.addValue(correctCount);
                                    key = keyPrefix + "|error";
                                    DescriptiveStatistics errorStat = stats.get(key);
                                    if (errorStat == null) {
                                        errorStat = new DescriptiveStatistics();
                                        stats.put(key, errorStat);
                                    }
                                    errorStat.addValue(errorCount);
                                    key = keyPrefix + "|total";
                                    DescriptiveStatistics totalStat = stats.get(key);
                                    if (totalStat == null) {
                                        totalStat = new DescriptiveStatistics();
                                        stats.put(key, totalStat);
                                    }
                                    totalStat.addValue(totalCount);
                                }
                            }

                            j++;
                        }
                    }
                    i++;
                }
            } else {
                Scanner scanner = new Scanner(file);
                boolean firstLine = true;
                while (scanner.hasNextLine()) {
                    String line = scanner.nextLine();
                    if (firstLine) {
                        if (firstFile)
                            writer.write(line + "\n");
                        firstLine = false;
                    } else {
                        writer.write(line + "\n");
                    }
                    writer.flush();
                }
            } // file type
        } // next file

        Writer statsWriter = writers.get("KEMatrix");
        writeStats(statsWriter, errorMap);
        statsWriter.write("\n");
        String[] statTypes = new String[] { "known", "unknown", "goodSeg", "badSeg", "inBeam", "total",
                "knownLetter", "unknownLetter", "goodSegLetter", "badSegLetter", "totalLetter" };
        for (String statType : statTypes) {
            for (String groupName : groupNames) {
                Map<String, DescriptiveStatistics> statsMap = statMap.get(groupName);
                DescriptiveStatistics correctStat = statsMap.get(statType + "|correct");
                DescriptiveStatistics errorStat = statsMap.get(statType + "|error");
                DescriptiveStatistics totalStat = statsMap.get(statType + "|total");

                statsWriter.write(CSV.format(statType + "%Avg") + CSV.format(correctStat.getMean())
                        + CSV.format(errorStat.getMean()) + CSV.format(totalStat.getMean())
                        + CSV.getCsvSeparator());

            } // next group
            statsWriter.write("\n");
            for (String groupName : groupNames) {
                Map<String, DescriptiveStatistics> statsMap = statMap.get(groupName);
                DescriptiveStatistics correctStat = statsMap.get(statType + "|correct");
                DescriptiveStatistics errorStat = statsMap.get(statType + "|error");
                DescriptiveStatistics totalStat = statsMap.get(statType + "|total");

                statsWriter.write(CSV.format(statType + "%Dev") + CSV.format(correctStat.getStandardDeviation())
                        + CSV.format(errorStat.getStandardDeviation())
                        + CSV.format(totalStat.getStandardDeviation()) + CSV.getCsvSeparator());

            } // next group
            statsWriter.write("\n");
            statsWriter.flush();
        }
        statsWriter.close();

    } catch (IOException e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

/**
 * Clear out anything found in the right & left margins
 * @param sourceImage/*from w ww. jav a  2s . co  m*/
 */
void cleanMargins(SourceImage sourceImage) {
    LOG.debug("########## cleanMargins #########");

    int minCardinalityForMargin = 8;
    double averageShapeWidth = sourceImage.getAverageShapeWidth();

    LOG.debug("Finding right margin");
    double rightLimit = (double) sourceImage.getWidth() * 0.67;

    // first, create a DBScan cluster of all rows near the right-hand side
    List<RowOfShapes> rightHandRows = new ArrayList<RowOfShapes>();
    List<double[]> rightCoordinates = new ArrayList<double[]>();

    for (RowOfShapes row : sourceImage.getRows()) {
        double right = row.getRight();
        if (right >= rightLimit) {
            LOG.trace(row.toString());
            LOG.trace(
                    "Right: " + right + " + " + row.getXAdjustment() + " = " + (right - row.getXAdjustment()));
            right -= row.getXAdjustment();
            rightHandRows.add(row);
            rightCoordinates.add(new double[] { right });
        }
    }

    DBSCANClusterer<RowOfShapes> rightMarginClusterer = new DBSCANClusterer<RowOfShapes>(rightHandRows,
            rightCoordinates);
    Set<Set<RowOfShapes>> rowClusters = rightMarginClusterer.cluster(averageShapeWidth, minCardinalityForMargin,
            true);

    TreeSet<Set<RowOfShapes>> orderedRowClusters = new TreeSet<Set<RowOfShapes>>(
            new CardinalityComparator<RowOfShapes>());
    orderedRowClusters.addAll(rowClusters);

    int i = 0;

    // find the right-most cluster with sufficient cardinality, and assume it's the right margin
    DescriptiveStatistics rightMarginStats = null;
    for (Set<RowOfShapes> cluster : orderedRowClusters) {
        DescriptiveStatistics rightStats = new DescriptiveStatistics();
        for (RowOfShapes row : cluster)
            rightStats.addValue(row.getRight() - row.getXAdjustment());

        LOG.debug("Cluster " + i + ". Cardinality=" + cluster.size());
        LOG.debug("Right mean : " + rightStats.getMean());
        LOG.debug("Right std dev: " + rightStats.getStandardDeviation());

        if (cluster.size() >= minCardinalityForMargin
                && (rightMarginStats == null || rightMarginStats.getMean() < rightStats.getMean())) {
            rightMarginStats = rightStats;
        }
        i++;
    }

    // see how many rows would violate this margin - if too many, assume no margin
    // these rows are only rows which extend across the margin
    if (rightMarginStats != null) {
        LOG.debug("Right margin mean : " + rightMarginStats.getMean());
        LOG.debug("Right margin std dev: " + rightMarginStats.getStandardDeviation());

        double rightMarginLimit = rightMarginStats.getMean() + sourceImage.getAverageShapeWidth();
        LOG.debug("rightMarginLimit: " + rightMarginLimit);
        int numRowsToChop = 0;
        for (RowOfShapes row : sourceImage.getRows()) {
            if (row.getRight() >= rightLimit) {
                if (row.getRight() - row.getXAdjustment() >= rightMarginLimit
                        && row.getLeft() - row.getXAdjustment() <= rightMarginLimit) {
                    LOG.debug("Found overlapping row : " + row);
                    LOG.debug("Adjusted right : " + (row.getRight() - row.getXAdjustment()));
                    numRowsToChop++;
                }
            }
        }
        if (numRowsToChop >= 3) {
            LOG.debug("Too many overlapping rows - ignoring margin");
            rightMarginStats = null;
        }
    }

    if (rightMarginStats != null) {
        double rightMarginLimit = rightMarginStats.getMean() + sourceImage.getAverageShapeWidth();
        List<RowOfShapes> rowsToRemove = new ArrayList<RowOfShapes>();
        for (RowOfShapes row : sourceImage.getRows()) {
            double right = row.getRight() - row.getXAdjustment();
            LOG.trace(row.toString());
            LOG.trace("Adjusted right: " + right);

            if (right >= rightMarginLimit) {
                LOG.trace("Has out-of-margin stuff!");
                // need to chop off groups to the right of this threshold
                List<GroupOfShapes> groupsToChop = new ArrayList<GroupOfShapes>();
                for (GroupOfShapes group : row.getGroups()) {
                    if (group.getLeft() - row.getXAdjustment() > rightMarginLimit) {
                        groupsToChop.add(group);
                        LOG.debug("Chopping group outside of right margin: " + group);
                    }
                }
                for (GroupOfShapes group : groupsToChop) {
                    row.getShapes().removeAll(group.getShapes());
                }
                row.getGroups().removeAll(groupsToChop);

                if (row.getGroups().size() == 0) {
                    LOG.debug("Removing empty " + row);
                    rowsToRemove.add(row);
                } else {
                    row.recalculate();
                    row.assignGuideLines();
                }
            } // does this row extend beyond the margin?
        } // next row
        sourceImage.getRows().removeAll(rowsToRemove);
    } // have a right margin

    LOG.debug("Finding left margin");
    double leftLimit = (double) sourceImage.getWidth() * 0.33;

    // first, create a DBScan cluster of all rows near the left-hand side
    List<RowOfShapes> leftHandRows = new ArrayList<RowOfShapes>();
    List<double[]> leftCoordinates = new ArrayList<double[]>();

    for (RowOfShapes row : sourceImage.getRows()) {
        double left = row.getLeft();
        if (left <= leftLimit) {
            LOG.trace(row.toString());
            LOG.trace("Left: " + left + " - " + row.getXAdjustment() + " = " + (left - row.getXAdjustment()));
            left -= row.getXAdjustment();
            leftHandRows.add(row);
            leftCoordinates.add(new double[] { left });
        }
    }

    DBSCANClusterer<RowOfShapes> leftMarginClusterer = new DBSCANClusterer<RowOfShapes>(leftHandRows,
            leftCoordinates);
    Set<Set<RowOfShapes>> rowClustersLeft = leftMarginClusterer.cluster(averageShapeWidth,
            minCardinalityForMargin, true);

    TreeSet<Set<RowOfShapes>> orderedRowClustersLeft = new TreeSet<Set<RowOfShapes>>(
            new CardinalityComparator<RowOfShapes>());
    orderedRowClustersLeft.addAll(rowClustersLeft);

    i = 0;

    // find the left-most cluster with sufficient cardinality, and assume it's the left margin
    DescriptiveStatistics leftMarginStats = null;
    for (Set<RowOfShapes> cluster : orderedRowClustersLeft) {
        DescriptiveStatistics leftStats = new DescriptiveStatistics();
        for (RowOfShapes row : cluster)
            leftStats.addValue(row.getLeft() - row.getXAdjustment());

        LOG.debug("Cluster " + i + ". Cardinality=" + cluster.size());
        LOG.debug("Left mean : " + leftStats.getMean());
        LOG.debug("Left std dev: " + leftStats.getStandardDeviation());

        if (cluster.size() >= minCardinalityForMargin
                && (leftMarginStats == null || leftMarginStats.getMean() > leftStats.getMean())) {
            leftMarginStats = leftStats;
        }
        i++;
    }

    // see how many rows would violate this margin - if too many, assume no margin
    // these rows are only rows which extend across the margin
    if (leftMarginStats != null) {
        LOG.debug("Left margin mean : " + leftMarginStats.getMean());
        LOG.debug("Left margin std dev: " + leftMarginStats.getStandardDeviation());

        double leftMarginLimit = leftMarginStats.getMean() - sourceImage.getAverageShapeWidth();
        LOG.debug("leftMarginLimit: " + leftMarginLimit);
        int numRowsToChop = 0;
        for (RowOfShapes row : sourceImage.getRows()) {
            if (row.getLeft() <= leftLimit) {
                if (row.getLeft() - row.getXAdjustment() <= leftMarginLimit
                        && row.getRight() - row.getXAdjustment() >= leftMarginLimit) {
                    LOG.debug("Found overlapping row : " + row);
                    LOG.debug("Adjusted left : " + (row.getLeft() - row.getXAdjustment()));
                    numRowsToChop++;
                }
            }
        }
        if (numRowsToChop >= 3) {
            LOG.debug("Too many overlapping rows - ignoring margin");
            leftMarginStats = null;
        }
    }

    if (leftMarginStats != null) {
        double leftMarginLimit = leftMarginStats.getMean() - sourceImage.getAverageShapeWidth();
        List<RowOfShapes> rowsToRemove = new ArrayList<RowOfShapes>();
        for (RowOfShapes row : sourceImage.getRows()) {
            double left = row.getLeft() - row.getXAdjustment();
            LOG.trace(row.toString());
            LOG.trace("Adjusted left: " + left);

            if (left <= leftMarginLimit) {
                LOG.trace("Has out-of-margin stuff!");
                // need to chop off groups to the left of this threshold
                List<GroupOfShapes> groupsToChop = new ArrayList<GroupOfShapes>();
                for (GroupOfShapes group : row.getGroups()) {
                    if (group.getRight() - row.getXAdjustment() < leftMarginLimit) {
                        groupsToChop.add(group);
                        LOG.debug("Chopping group outside of left margin: " + group);
                    }
                }
                for (GroupOfShapes group : groupsToChop) {
                    row.getShapes().removeAll(group.getShapes());
                }
                row.getGroups().removeAll(groupsToChop);

                if (row.getGroups().size() == 0) {
                    LOG.debug("Removing empty " + row);
                    rowsToRemove.add(row);
                } else {
                    row.recalculate();
                    row.assignGuideLines();
                }
            } // does this row extend beyond the margin?
        } // next row
        sourceImage.getRows().removeAll(rowsToRemove);
    } // have a left margin
}

From source file:guineu.modules.dataanalysis.wilcoxontest.WilcoxonTestTask.java

public double[] Ttest(int mol) throws IllegalArgumentException {
    DescriptiveStatistics stats1 = new DescriptiveStatistics();
    DescriptiveStatistics stats2 = new DescriptiveStatistics();
    double[] values = new double[3];
    String parameter1 = "";

    if (parameter == null) {
        for (int i = 0; i < group1.length; i++) {
            try {
                stats1.addValue((Double) this.dataset.getRow(mol).getPeak(group1[i]));
            } catch (Exception e) {
                e.printStackTrace();// ww w .ja v a  2 s .c  om
            }
        }
        for (int i = 0; i < group2.length; i++) {
            try {
                stats2.addValue((Double) this.dataset.getRow(mol).getPeak(group2[i]));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    } else {
        try {
            // Determine groups for selected raw data files
            List<String> availableParameterValues = dataset.getParameterAvailableValues(parameter);

            int numberOfGroups = availableParameterValues.size();

            if (numberOfGroups > 1) {
                parameter1 = availableParameterValues.get(0);
                String parameter2 = availableParameterValues.get(1);

                for (String sampleName : dataset.getAllColumnNames()) {
                    if (dataset.getParametersValue(sampleName, parameter) != null
                            && dataset.getParametersValue(sampleName, parameter).equals(parameter1)) {
                        try {
                            stats1.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                        } catch (Exception e) {
                        }
                    } else if (dataset.getParametersValue(sampleName, parameter) != null
                            && dataset.getParametersValue(sampleName, parameter).equals(parameter2)) {
                        try {
                            stats2.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                        } catch (Exception e) {
                        }
                    }
                }
            } else {
                return null;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    try {
        final Rengine rEngine;
        try {
            rEngine = RUtilities.getREngine();
        } catch (Throwable t) {

            throw new IllegalStateException(
                    "Wilcoxon test requires R but it couldn't be loaded (" + t.getMessage() + ')');
        }
        synchronized (RUtilities.R_SEMAPHORE) {
            rEngine.eval("x <- 0");
            rEngine.eval("y <- 0");
            long group1 = rEngine.rniPutDoubleArray(stats1.getValues());
            rEngine.rniAssign("x", group1, 0);

            long group2 = rEngine.rniPutDoubleArray(stats2.getValues());
            rEngine.rniAssign("y", group2, 0);
            /* if(mol == 1){
             rEngine.eval("write.csv(x, \"x.csv\")");
             rEngine.eval("write.csv(y, \"y.csv\")");
             }*/
            rEngine.eval("result <- 0");

            rEngine.eval("result <- wilcox.test(as.numeric(t(x)),as.numeric(t(y)))");
            long e = rEngine.rniParse("result$p.value", 1);
            long r = rEngine.rniEval(e, 0);
            REXP x = new REXP(rEngine, r);

            values[0] = x.asDouble();
        }

        rEngine.end();
        setStatus(TaskStatus.FINISHED);
    } catch (Exception ex) {
        Logger.getLogger(WilcoxonTestTask.class.getName()).log(Level.SEVERE, null, ex);
        setStatus(TaskStatus.ERROR);
    }

    values[1] = stats1.getMean();
    values[2] = stats2.getMean();
    return values;
}