Example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics getMean

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics getMean.

Prototype

public double getMean()

Source Link

Document

Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> arithmetic mean </a> of the available values

Usage

From source file:com.joliciel.jochre.graphics.SourceImageImpl.java

public List<Rectangle> findColumnSeparators() {
    if (columnSeparators == null) {
        LOG.debug("############ findColumnSeparators ##############");
        double slope = this.getMeanHorizontalSlope();

        double imageMidPointX = (double) this.getWidth() / 2.0;

        int[] horizontalCounts = new int[this.getHeight()];
        DescriptiveStatistics rowXHeightStats = new DescriptiveStatistics();
        // first get the fill factor for each horizontal row in the image
        for (RowOfShapes row : this.getRows()) {
            rowXHeightStats.addValue(row.getXHeight());
            for (Shape shape : row.getShapes()) {
                double shapeMidPointX = (double) (shape.getLeft() + shape.getRight()) / 2.0;
                int slopeAdjustedTop = (int) Math
                        .round(shape.getTop() + (slope * (shapeMidPointX - imageMidPointX)));
                if (slopeAdjustedTop >= 0 && slopeAdjustedTop < this.getHeight()) {
                    for (int i = 0; i < shape.getHeight(); i++) {
                        if (slopeAdjustedTop + i < horizontalCounts.length)
                            horizontalCounts[slopeAdjustedTop + i] += shape.getWidth();
                    }//w w w. j  a va 2  s.c  om
                }
            }
        }
        DescriptiveStatistics horizontalStats = new DescriptiveStatistics();
        DescriptiveStatistics horizontalStatsNonEmpty = new DescriptiveStatistics();
        for (int i = 0; i < this.getHeight(); i++) {
            //         LOG.trace("Row " + i + ": " + horizontalCounts[i]);
            horizontalStats.addValue(horizontalCounts[i]);
            if (horizontalCounts[i] > 0)
                horizontalStatsNonEmpty.addValue(horizontalCounts[i]);
        }
        LOG.debug("Mean horizontal count: " + horizontalStats.getMean());
        LOG.debug("Median horizontal count: " + horizontalStats.getPercentile(50));
        LOG.debug("25 percentile horizontal count: " + horizontalStats.getPercentile(25));
        LOG.debug("Mean horizontal count (non empty): " + horizontalStatsNonEmpty.getMean());
        LOG.debug("Median horizontal count (non empty): " + horizontalStatsNonEmpty.getPercentile(50));
        LOG.debug("25 percentile horizontal count (non empty): " + horizontalStatsNonEmpty.getPercentile(25));
        LOG.debug("10 percentile horizontal count (non empty): " + horizontalStatsNonEmpty.getPercentile(10));

        double maxEmptyRowCount = horizontalStatsNonEmpty.getMean() / 8.0;
        LOG.debug("maxEmptyRowCount: " + maxEmptyRowCount);

        boolean inEmptyHorizontalRange = false;
        List<int[]> emptyHorizontalRanges = new ArrayList<int[]>();
        int emptyHorizontalRangeStart = 0;
        for (int i = 0; i < this.getHeight(); i++) {
            if (!inEmptyHorizontalRange && horizontalCounts[i] <= maxEmptyRowCount) {
                inEmptyHorizontalRange = true;
                emptyHorizontalRangeStart = i;
            } else if (inEmptyHorizontalRange && horizontalCounts[i] > maxEmptyRowCount) {
                inEmptyHorizontalRange = false;
                emptyHorizontalRanges.add(new int[] { emptyHorizontalRangeStart, i });
            }
        }
        if (inEmptyHorizontalRange) {
            emptyHorizontalRanges.add(new int[] { emptyHorizontalRangeStart, this.getHeight() - 1 });
        }

        LOG.debug("rowXHeight mean: " + rowXHeightStats.getMean());
        LOG.debug("rowXHeight median: " + rowXHeightStats.getPercentile(50));
        double minHorizontalBreak = rowXHeightStats.getMean() * 2.0;
        LOG.debug("minHorizontalBreak: " + minHorizontalBreak);
        int smallBreakCount = 0;
        int mainTextTop = 0;
        int bigBreakCount = 0;
        for (int[] emptyHorizontalRange : emptyHorizontalRanges) {
            int height = emptyHorizontalRange[1] - emptyHorizontalRange[0];
            LOG.trace("empty range: " + emptyHorizontalRange[0] + ", " + emptyHorizontalRange[1] + " = "
                    + height);
            if (bigBreakCount < 2 && smallBreakCount < 2 && height > minHorizontalBreak) {
                mainTextTop = emptyHorizontalRange[1];
                bigBreakCount++;
            }
            if (height <= minHorizontalBreak)
                smallBreakCount++;
        }

        LOG.debug("mainTextTop:" + mainTextTop);
        // lift mainTextTop upwards by max an x-height or till we reach a zero row
        int minTop = mainTextTop - (int) (rowXHeightStats.getMean() / 2.0);
        if (minTop < 0)
            minTop = 0;
        for (int i = mainTextTop; i > minTop; i--) {
            mainTextTop = i;
            if (horizontalCounts[i] == 0) {
                break;
            }
        }
        LOG.debug("mainTextTop (adjusted):" + mainTextTop);

        smallBreakCount = 0;
        bigBreakCount = 0;
        int mainTextBottom = this.getHeight();
        for (int i = emptyHorizontalRanges.size() - 1; i >= 0; i--) {
            int[] emptyHorizontalRange = emptyHorizontalRanges.get(i);
            int height = emptyHorizontalRange[1] - emptyHorizontalRange[0];
            LOG.trace("emptyHorizontalRange: " + emptyHorizontalRange[0] + ", height: " + height
                    + ", bigBreakCount: " + bigBreakCount + ", smallBreakCount: " + smallBreakCount);
            if ((bigBreakCount + smallBreakCount) <= 2 && height > minHorizontalBreak) {
                mainTextBottom = emptyHorizontalRange[0];
                LOG.trace("Set mainTextBottom to " + mainTextBottom);
                bigBreakCount++;
            }
            if (height <= minHorizontalBreak)
                smallBreakCount++;
            if ((bigBreakCount + smallBreakCount) > 2)
                break;
        }
        LOG.debug("mainTextBottom:" + mainTextBottom);
        // lower mainTextBottom downwards by max an x-height or till we reach a zero row
        int maxBottom = mainTextBottom + (int) (rowXHeightStats.getMean() / 2.0);
        if (maxBottom > this.getHeight())
            maxBottom = this.getHeight();
        for (int i = mainTextBottom; i < maxBottom; i++) {
            mainTextBottom = i;
            if (horizontalCounts[i] == 0) {
                break;
            }
        }
        LOG.debug("mainTextBottom (adjusted):" + mainTextBottom);

        int[] verticalCounts = new int[this.getWidth()];
        // first get the fill factor for each horizontal row in the image
        for (RowOfShapes row : this.getRows()) {
            for (Shape shape : row.getShapes()) {
                int slopeAdjustedLeft = (int) Math.round(shape.getLeft() - row.getXAdjustment());
                double shapeMidPointX = (double) (shape.getLeft() + shape.getRight()) / 2.0;
                int slopeAdjustedTop = (int) Math
                        .round(shape.getTop() + (slope * (shapeMidPointX - imageMidPointX)));
                if (slopeAdjustedTop >= mainTextTop && slopeAdjustedTop <= mainTextBottom
                        && slopeAdjustedLeft >= 0 && slopeAdjustedLeft < this.getWidth()) {
                    for (int i = 0; i < shape.getWidth(); i++) {
                        if (slopeAdjustedLeft + i < this.getWidth())
                            verticalCounts[slopeAdjustedLeft + i] += shape.getHeight();
                    }
                }
            }
        }

        DescriptiveStatistics verticalStats = new DescriptiveStatistics();
        DescriptiveStatistics verticalStatsNonEmpty = new DescriptiveStatistics();
        for (int i = 0; i < this.getWidth(); i++) {
            //         LOG.trace("Column " + i + ": " + verticalCounts[i]);
            verticalStats.addValue(verticalCounts[i]);
            if (verticalCounts[i] > 0)
                verticalStatsNonEmpty.addValue(verticalCounts[i]);
        }
        LOG.debug("Mean vertical count: " + verticalStats.getMean());
        LOG.debug("Median vertical count: " + verticalStats.getPercentile(50));
        LOG.debug("25 percentile vertical count: " + verticalStats.getPercentile(25));
        LOG.debug("Mean vertical count (non empty): " + verticalStatsNonEmpty.getMean());
        LOG.debug("Median vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(50));
        LOG.debug("25 percentile vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(25));
        LOG.debug("10 percentile vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(10));
        LOG.debug("1 percentile vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(1));

        //         double maxEmptyColumnCount = verticalStatsNonEmpty.getMean() / 8.0;
        double maxEmptyColumnCount = verticalStatsNonEmpty.getPercentile(1);
        LOG.debug("maxEmptyColumnCount: " + maxEmptyColumnCount);

        boolean inEmptyVerticalRange = false;
        List<int[]> emptyVerticalRanges = new ArrayList<int[]>();
        int emptyVerticalRangeStart = 0;
        for (int i = 0; i < this.getWidth(); i++) {
            if (!inEmptyVerticalRange && verticalCounts[i] <= maxEmptyColumnCount) {
                inEmptyVerticalRange = true;
                emptyVerticalRangeStart = i;
            } else if (inEmptyVerticalRange && verticalCounts[i] > maxEmptyColumnCount) {
                inEmptyVerticalRange = false;
                emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, i });
            }
        }
        if (inEmptyVerticalRange) {
            emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, this.getWidth() - 1 });
        }

        LOG.debug("rowXHeight mean: " + rowXHeightStats.getMean());
        LOG.debug("rowXHeight median: " + rowXHeightStats.getPercentile(50));
        double minVerticalBreak = rowXHeightStats.getMean() * 1.0;
        LOG.debug("minVerticalBreak: " + minVerticalBreak);

        List<int[]> columnBreaks = new ArrayList<int[]>();
        for (int[] emptyVerticalRange : emptyVerticalRanges) {
            int width = emptyVerticalRange[1] - emptyVerticalRange[0];
            LOG.trace("empty range: " + emptyVerticalRange[0] + ", " + emptyVerticalRange[1] + " = " + width);

            if (width >= minVerticalBreak) {
                columnBreaks.add(emptyVerticalRange);
                LOG.trace("Found column break!");
            }
        }

        columnSeparators = new ArrayList<Rectangle>();
        for (int[] columnBreak : columnBreaks) {
            // reduce the column break to the thickest empty area if possible
            int[] bestColumnBreak = null;
            double originalCount = maxEmptyColumnCount;
            maxEmptyColumnCount = 0;
            while (bestColumnBreak == null && maxEmptyColumnCount <= originalCount) {
                inEmptyVerticalRange = false;
                emptyVerticalRanges = new ArrayList<int[]>();
                emptyVerticalRangeStart = columnBreak[0];
                for (int i = columnBreak[0]; i <= columnBreak[1]; i++) {
                    if (!inEmptyVerticalRange && verticalCounts[i] <= maxEmptyColumnCount) {
                        inEmptyVerticalRange = true;
                        emptyVerticalRangeStart = i;
                    } else if (inEmptyVerticalRange && verticalCounts[i] > maxEmptyColumnCount) {
                        inEmptyVerticalRange = false;
                        emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, i });
                    }
                }
                if (inEmptyVerticalRange) {
                    emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, columnBreak[1] });
                }

                for (int[] emptyVerticalRange : emptyVerticalRanges) {
                    if (bestColumnBreak == null || (emptyVerticalRange[1]
                            - emptyVerticalRange[0] > bestColumnBreak[1] - bestColumnBreak[0]))
                        bestColumnBreak = emptyVerticalRange;
                }
                maxEmptyColumnCount += (originalCount / 8.0);
            }

            if (bestColumnBreak == null)
                bestColumnBreak = columnBreak;

            Rectangle whiteArea = new WhiteArea(bestColumnBreak[0], mainTextTop, bestColumnBreak[1],
                    mainTextBottom);
            columnSeparators.add(whiteArea);
            LOG.debug("ColumnBreak: " + whiteArea);
        } // next column break
    }
    return columnSeparators;
}

From source file:guineu.modules.dataanalysis.foldChanges.FoldTestTask.java

public double Foldtest(int mol) throws IllegalArgumentException, MathException {
    DescriptiveStatistics stats1 = new DescriptiveStatistics();
    DescriptiveStatistics stats2 = new DescriptiveStatistics();

    String parameter1 = "";

    try {/* www. j a v a  2s .c  o  m*/
        // Determine groups for selected raw data files
        List<String> availableParameterValues = dataset.getParameterAvailableValues(parameter);

        int numberOfGroups = availableParameterValues.size();

        if (numberOfGroups > 1) {
            parameter1 = availableParameterValues.get(0);
            String parameter2 = availableParameterValues.get(1);

            for (String sampleName : dataset.getAllColumnNames()) {
                if (dataset.getParametersValue(sampleName, parameter) != null
                        && dataset.getParametersValue(sampleName, parameter).equals(parameter1)) {
                    stats1.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                } else if (dataset.getParametersValue(sampleName, parameter) != null
                        && dataset.getParametersValue(sampleName, parameter).equals(parameter2)) {
                    stats2.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                }
            }
        } else {
            return -1;
        }

    } catch (Exception e) {
        e.printStackTrace();
    }

    if (stats1.getN() > 0 && stats2.getN() > 0) {
        /*double[] sortValues1 = stats1.getSortedValues();
         double[] sortValues2 = stats2.getSortedValues();
                
         return sortValues1[((int) stats1.getN() / 2)] / sortValues2[((int) stats2.getN() / 2)];*/
        return stats1.getMean() / stats2.getMean();
    } else {
        return 0;
    }
}

From source file:guineu.modules.dataanalysis.Ttest.TTestTask.java

public double[] Ttest(int mol) throws IllegalArgumentException, MathException {
    DescriptiveStatistics stats1 = new DescriptiveStatistics();
    DescriptiveStatistics stats2 = new DescriptiveStatistics();
    double[] values = new double[3];
    String parameter1 = "";

    try {/*from   ww w  .  j  ava  2  s  . c o  m*/
        // Determine groups for selected raw data files
        List<String> availableParameterValues = dataset.getParameterAvailableValues(parameter);

        int numberOfGroups = availableParameterValues.size();

        if (numberOfGroups > 1) {
            parameter1 = availableParameterValues.get(0);
            String parameter2 = availableParameterValues.get(1);

            for (String sampleName : dataset.getAllColumnNames()) {
                if (dataset.getParametersValue(sampleName, parameter) != null
                        && dataset.getParametersValue(sampleName, parameter).equals(parameter1)) {
                    try {
                        stats1.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                    } catch (Exception e) {

                    }
                } else if (dataset.getParametersValue(sampleName, parameter) != null
                        && dataset.getParametersValue(sampleName, parameter).equals(parameter2)) {
                    try {
                        stats2.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                    } catch (Exception e) {

                    }
                }
            }
        } else {
            return null;
        }
    } catch (Exception e) {
    }

    TTestImpl ttest = new TTestImpl();
    values[0] = ttest.tTest((StatisticalSummary) stats1, (StatisticalSummary) stats2);
    values[1] = stats1.getMean();
    values[2] = stats2.getMean();
    return values;
}

From source file:com.joliciel.talismane.other.corpus.CorpusStatistics.java

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration, Writer writer) {
    sentenceCount++;//from   www. java 2  s.  c o  m
    sentenceLengthStats.addValue(parseConfiguration.getPosTagSequence().size());

    for (PosTaggedToken posTaggedToken : parseConfiguration.getPosTagSequence()) {
        if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG))
            continue;

        Token token = posTaggedToken.getToken();

        String word = token.getOriginalText();
        words.add(word);
        if (referenceWords != null) {
            if (!referenceWords.contains(word))
                unknownTokenCount++;
        }
        if (alphanumeric.matcher(token.getOriginalText()).find()) {
            String lowercase = word.toLowerCase(TalismaneSession.getLocale());
            lowerCaseWords.add(lowercase);
            alphanumericCount++;
            if (referenceLowercaseWords != null) {
                if (!referenceLowercaseWords.contains(lowercase))
                    unknownAlphanumericCount++;
            }
        }

        tokenCount++;

        Integer countObj = posTagCounts.get(posTaggedToken.getTag().getCode());
        int count = countObj == null ? 0 : countObj.intValue();
        count++;
        posTagCounts.put(posTaggedToken.getTag().getCode(), count);
    }

    int maxDepth = 0;
    DescriptiveStatistics avgSyntaxDepthForSentenceStats = new DescriptiveStatistics();
    for (DependencyArc arc : parseConfiguration.getDependencies()) {
        Integer countObj = depLabelCounts.get(arc.getLabel());
        int count = countObj == null ? 0 : countObj.intValue();
        count++;
        depLabelCounts.put(arc.getLabel(), count);
        totalDepCount++;

        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)
                && (arc.getLabel() == null || arc.getLabel().length() == 0)) {
            // do nothing for unattached stuff (e.g. punctuation)
        } else if (arc.getLabel().equals("ponct")) {
            // do nothing for punctuation
        } else {
            int depth = 0;
            DependencyArc theArc = arc;
            while (theArc != null && !theArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)) {
                theArc = parseConfiguration.getGoverningDependency(theArc.getHead());
                depth++;
            }
            if (depth > maxDepth)
                maxDepth = depth;

            syntaxDepthStats.addValue(depth);
            avgSyntaxDepthForSentenceStats.addValue(depth);

            int distance = Math
                    .abs(arc.getHead().getToken().getIndex() - arc.getDependent().getToken().getIndex());
            syntaxDistanceStats.addValue(distance);
        }

        maxSyntaxDepthStats.addValue(maxDepth);
        if (avgSyntaxDepthForSentenceStats.getN() > 0)
            avgSyntaxDepthStats.addValue(avgSyntaxDepthForSentenceStats.getMean());
    }

    // we cheat a little bit by only allowing each arc to count once
    // there could be a situation where there are two independent non-projective arcs
    // crossing the same mother arc, but we prefer here to underestimate,
    // as this phenomenon is quite rare.
    Set<DependencyArc> nonProjectiveArcs = new HashSet<DependencyArc>();
    int i = 0;
    for (DependencyArc arc : parseConfiguration.getDependencies()) {
        i++;
        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)
                && (arc.getLabel() == null || arc.getLabel().length() == 0))
            continue;
        if (nonProjectiveArcs.contains(arc))
            continue;

        int headIndex = arc.getHead().getToken().getIndex();
        int depIndex = arc.getDependent().getToken().getIndex();
        int startIndex = headIndex < depIndex ? headIndex : depIndex;
        int endIndex = headIndex >= depIndex ? headIndex : depIndex;
        int j = 0;
        for (DependencyArc otherArc : parseConfiguration.getDependencies()) {
            j++;
            if (j <= i)
                continue;
            if (otherArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)
                    && (otherArc.getLabel() == null || otherArc.getLabel().length() == 0))
                continue;
            if (nonProjectiveArcs.contains(otherArc))
                continue;

            int headIndex2 = otherArc.getHead().getToken().getIndex();
            int depIndex2 = otherArc.getDependent().getToken().getIndex();
            int startIndex2 = headIndex2 < depIndex2 ? headIndex2 : depIndex2;
            int endIndex2 = headIndex2 >= depIndex2 ? headIndex2 : depIndex2;
            boolean nonProjective = false;
            if (startIndex2 < startIndex && endIndex2 > startIndex && endIndex2 < endIndex) {
                nonProjective = true;
            } else if (startIndex2 > startIndex && startIndex2 < endIndex && endIndex2 > endIndex) {
                nonProjective = true;
            }
            if (nonProjective) {
                nonProjectiveArcs.add(arc);
                nonProjectiveArcs.add(otherArc);
                nonProjectiveCount++;
                LOG.debug("Non-projective arcs in sentence: " + parseConfiguration.getSentence().getText());
                LOG.debug(arc.toString());
                LOG.debug(otherArc.toString());
                break;
            }
        }
    }
}

From source file:com.mozilla.socorro.hadoop.RawDumpSize.java

public int run(String[] args) throws Exception {
    if (args.length != 1) {
        return printUsage();
    }//from   w ww .ja  v  a 2  s  . co  m

    int rc = -1;
    Job job = initJob(args);
    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        rc = 0;
        FileSystem hdfs = null;
        DescriptiveStatistics rawStats = new DescriptiveStatistics();
        long rawTotal = 0L;
        DescriptiveStatistics processedStats = new DescriptiveStatistics();
        long processedTotal = 0L;
        try {
            hdfs = FileSystem.get(job.getConfiguration());
            Pattern tabPattern = Pattern.compile("\t");
            for (FileStatus status : hdfs.listStatus(FileOutputFormat.getOutputPath(job))) {
                if (!status.isDir()) {
                    BufferedReader reader = null;
                    try {
                        reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath())));
                        String line = null;
                        while ((line = reader.readLine()) != null) {
                            String[] splits = tabPattern.split(line);
                            int byteSize = Integer.parseInt(splits[2]);
                            if ("raw".equals(splits[1])) {
                                rawStats.addValue(byteSize);
                                rawTotal += byteSize;
                            } else if ("processed".equals(splits[1])) {
                                processedStats.addValue(byteSize);
                                processedTotal += byteSize;
                            }
                        }
                    } finally {
                        if (reader != null) {
                            reader.close();
                        }
                    }
                }
            }
        } finally {
            if (hdfs != null) {
                hdfs.close();
            }
        }

        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " raw_data:dump =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", rawStats.getMin(),
                rawStats.getMax(), rawStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                rawStats.getPercentile(25.0d), rawStats.getPercentile(50.0d), rawStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + rawTotal);
        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " processed_data:json =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", processedStats.getMin(),
                processedStats.getMax(), processedStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                processedStats.getPercentile(25.0d), processedStats.getPercentile(50.0d),
                processedStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + processedTotal);
    }

    return rc;
}

From source file:edu.usc.goffish.gopher.sample.stats.N_Hop_Stats.java

@Override
public void compute(List<SubGraphMessage> messageList) {

    if (getIteration() == 0 && getSuperStep() == 0) {
        String data = new String(messageList.get(0).getData());

        //   debugLog("GOT DATA initial :" + data);
        hopCount = Integer.parseInt(data);

        try {/*from   w  w  w.j a va  2  s  .c o  m*/
            init();
        } catch (IOException e) {
            e.printStackTrace();
            throw new RuntimeException();
        }

    }
    long ls = System.currentTimeMillis();
    ISubgraphInstance instance = getCurrentInstance();

    if (instance == null) {
        //    debugLog("Instance == null : " + getIteration());
        voteToHalt();
        haultApp();
        return;
    }

    if (getSuperStep() == 0) {

        DescriptiveStatistics statistics = new DescriptiveStatistics();

        long diskTimeStart = System.currentTimeMillis();
        if (!instance.hasProperties()) {
            //    debugLog("No Properties : " + getIteration());
            voteToHalt();
            return;
        }

        debugLog("INSTANCE_LOAD," + subgraph.getId() + "," + (System.currentTimeMillis() - diskTimeStart) + ","
                + getSuperStep() + "," + getIteration());

        long travasalS = System.currentTimeMillis();
        // DescriptiveStatistics edgePropLoadTimeStats = new DescriptiveStatistics();
        for (ITemplateEdge edge : subgraph.edges()) {
            //   long edgePropStart = System.currentTimeMillis();
            ISubgraphObjectProperties edgeProps = instance.getPropertiesForEdge(edge.getId());
            //  edgePropLoadTimeStats.addValue(System.currentTimeMillis() - edgePropStart);

            String[] latencies = ((String) edgeProps.getValue(LATENCY_PROP)) == null ? null
                    : ((String) edgeProps.getValue(LATENCY_PROP)).split(",");
            String[] hops = ((String) edgeProps.getValue(HOP_PROP)) == null ? null
                    : ((String) edgeProps.getValue(HOP_PROP)).split(",");

            if (hops != null && latencies != null) {

                for (int i = 0; i < hops.length; i++) {
                    String h = hops[i];

                    if (hopCount == Integer.parseInt(h)) {
                        //              debugLog("HOP : " + h + ": Latency : " + latencies[i]);
                        double latency = Double.parseDouble(latencies[i]);
                        statistics.addValue(latency);
                    }
                }
            }

        }

        //debugLog("Travasal total : " + (System.currentTimeMillis() - travasalS));
        //debugLog("Edge Load Time max,avg:" + edgePropLoadTimeStats.getMax() + "," + edgePropLoadTimeStats.getMean());

        String data = "1:" + statistics.getMean();
        if (!"1:nan".equalsIgnoreCase(data)) {
            SubGraphMessage message = new SubGraphMessage(data.getBytes());
            sendMessage(partition.getId(), message);
            //debugLog("Sub-graph data sent : " + data);
        }
        voteToHalt();

    } else {

        if (acquireLock("N_HOP_" + partition.getId() + " _" + getIteration() + "_" + getSuperStep())) {
            //debugLog("Lock Acqured");
            DescriptiveStatistics statistics = new DescriptiveStatistics();
            boolean finalStage = false;
            for (SubGraphMessage msg : messageList) {

                String data = new String(msg.getData());
                //debugLog("Partittion got data : " + data);
                String[] parts = data.split(":");
                if ("1".equals(parts[0].trim())) {
                    if (!parts[1].equalsIgnoreCase("nan")) {
                        statistics.addValue(Double.parseDouble(parts[1]));
                        //debugLog("Stage 1 data added : " + parts[1]);
                    }
                } else {
                    finalStage = true;
                    if (!parts[1].equalsIgnoreCase("nan")) {
                        statistics.addValue(Double.parseDouble(parts[1]));
                        //debugLog("Stage 2 data added : " + parts[1]);
                    }
                }

            }

            if (finalStage) {
                try {

                    String data = "" + statistics.getMean();
                    try {
                        Double.parseDouble(data);
                        sendMessageToReduceStep(new SubGraphMessage(data.getBytes()));
                    } catch (Exception e) {

                    }

                    PrintWriter writer = new PrintWriter(new FileWriter("Hop_Stats.log", true));
                    log(writer, hopCount, statistics.getMean(), currentInstance.getTimestampStart());
                } catch (Exception e) {
                    e.printStackTrace();
                }
                voteToHalt();

            } else {

                String data = "2:" + statistics.getMean();

                if (!"2:nan".equalsIgnoreCase(data)) {
                    SubGraphMessage message = new SubGraphMessage(data.getBytes());
                    for (int i : partitions) {
                        sendMessage(i, message);
                    }
                    //debugLog("Stage 2 data sent :" + data);
                }
                voteToHalt();
            }
        } else {
            voteToHalt();
        }

    }

}

From source file:com.joliciel.talismane.stats.FScoreCalculator.java

/**
 * Combine the results of n cross validation results into a single f-score file.
 * @param directory// w  ww  . j a  v a  2 s  .co m
 * @param prefix
 * @param suffix
 * @param csvFileWriter
 */
static void combineCrossValidationResults(File directory, String prefix, String suffix, Writer csvFileWriter) {
    try {
        File[] files = directory.listFiles();
        Map<Integer, Map<String, FScoreStats>> fileStatsMap = new HashMap<Integer, Map<String, FScoreStats>>();
        for (File file : files) {
            if (file.getName().startsWith(prefix) && file.getName().endsWith(suffix)) {
                int index = Integer.parseInt(file.getName().substring(prefix.length(), prefix.length() + 1));
                Map<String, FScoreStats> statsMap = new HashMap<String, FScoreCalculator.FScoreStats>();
                fileStatsMap.put(index, statsMap);
                Scanner scanner = new Scanner(
                        new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")));

                boolean firstLine = true;
                int truePositivePos = -1;

                while (scanner.hasNextLine()) {
                    String line = scanner.nextLine();
                    List<String> cells = CSV.getCSVCells(line);
                    if (firstLine) {
                        int i = 0;
                        for (String cell : cells) {
                            if (cell.equals("true+")) {
                                truePositivePos = i;
                                break;
                            }
                            i++;
                        }
                        if (truePositivePos < 0) {
                            throw new JolicielException("Couldn't find true+ on first line");
                        }
                        firstLine = false;
                    } else {
                        FScoreStats stats = new FScoreStats();
                        String outcome = cells.get(0);
                        stats.outcome = outcome;
                        if (outcome.equals("AVERAGE"))
                            break;
                        stats.truePos = Integer.parseInt(cells.get(truePositivePos));
                        stats.falsePos = Integer.parseInt(cells.get(truePositivePos + 1));
                        stats.falseNeg = Integer.parseInt(cells.get(truePositivePos + 2));
                        stats.precision = Double.parseDouble(cells.get(truePositivePos + 3));
                        stats.recall = Double.parseDouble(cells.get(truePositivePos + 4));
                        stats.fScore = Double.parseDouble(cells.get(truePositivePos + 5));
                        statsMap.put(outcome, stats);
                    } // firstLine?
                } // has more lines
                scanner.close();
            } // file in current series
        } // next file

        int numFiles = fileStatsMap.size();
        if (numFiles == 0) {
            throw new JolicielException("No files found matching prefix and suffix provided");
        }
        Map<String, DescriptiveStatistics> descriptiveStatsMap = new HashMap<String, DescriptiveStatistics>();
        Map<String, FScoreStats> outcomeStats = new HashMap<String, FScoreCalculator.FScoreStats>();
        Set<String> outcomes = new TreeSet<String>();
        for (Map<String, FScoreStats> statsMap : fileStatsMap.values()) {
            for (FScoreStats stats : statsMap.values()) {
                DescriptiveStatistics fScoreStats = descriptiveStatsMap.get(stats.outcome + "fScore");
                if (fScoreStats == null) {
                    fScoreStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "fScore", fScoreStats);
                }
                fScoreStats.addValue(stats.fScore);
                DescriptiveStatistics precisionStats = descriptiveStatsMap.get(stats.outcome + "precision");
                if (precisionStats == null) {
                    precisionStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "precision", precisionStats);
                }
                precisionStats.addValue(stats.precision);
                DescriptiveStatistics recallStats = descriptiveStatsMap.get(stats.outcome + "recall");
                if (recallStats == null) {
                    recallStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "recall", recallStats);
                }
                recallStats.addValue(stats.recall);

                FScoreStats outcomeStat = outcomeStats.get(stats.outcome);
                if (outcomeStat == null) {
                    outcomeStat = new FScoreStats();
                    outcomeStat.outcome = stats.outcome;
                    outcomeStats.put(stats.outcome, outcomeStat);
                }
                outcomeStat.truePos += stats.truePos;
                outcomeStat.falsePos += stats.falsePos;
                outcomeStat.falseNeg += stats.falseNeg;

                outcomes.add(stats.outcome);
            }
        }

        csvFileWriter.write(CSV.format(prefix + suffix));
        csvFileWriter.write("\n");
        csvFileWriter.write(CSV.format("outcome"));
        csvFileWriter.write(CSV.format("true+") + CSV.format("false+") + CSV.format("false-")
                + CSV.format("tot precision") + CSV.format("avg precision") + CSV.format("dev precision")
                + CSV.format("tot recall") + CSV.format("avg recall") + CSV.format("dev recall")
                + CSV.format("tot f-score") + CSV.format("avg f-score") + CSV.format("dev f-score") + "\n");

        for (String outcome : outcomes) {
            csvFileWriter.write(CSV.format(outcome));
            FScoreStats outcomeStat = outcomeStats.get(outcome);
            DescriptiveStatistics fScoreStats = descriptiveStatsMap.get(outcome + "fScore");
            DescriptiveStatistics precisionStats = descriptiveStatsMap.get(outcome + "precision");
            DescriptiveStatistics recallStats = descriptiveStatsMap.get(outcome + "recall");
            outcomeStat.calculate();
            csvFileWriter.write(CSV.format(outcomeStat.truePos));
            csvFileWriter.write(CSV.format(outcomeStat.falsePos));
            csvFileWriter.write(CSV.format(outcomeStat.falseNeg));
            csvFileWriter.write(CSV.format(outcomeStat.precision * 100));
            csvFileWriter.write(CSV.format(precisionStats.getMean()));
            csvFileWriter.write(CSV.format(precisionStats.getStandardDeviation()));
            csvFileWriter.write(CSV.format(outcomeStat.recall * 100));
            csvFileWriter.write(CSV.format(recallStats.getMean()));
            csvFileWriter.write(CSV.format(recallStats.getStandardDeviation()));
            csvFileWriter.write(CSV.format(outcomeStat.fScore * 100));
            csvFileWriter.write(CSV.format(fScoreStats.getMean()));
            csvFileWriter.write(CSV.format(fScoreStats.getStandardDeviation()));
            csvFileWriter.write("\n");
            csvFileWriter.flush();
        }
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}

From source file:com.joliciel.jochre.lexicon.LexiconErrorWriter.java

static void mergeCrossValidation(File evalDir, String prefix) {
    try {/*from   w  w w .j a v a  2s  . c  o m*/
        File[] files = evalDir.listFiles(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                if (name.endsWith(".csv"))
                    return true;
                else
                    return false;
            }
        });
        List<String> groupNames = new ArrayList<String>();
        Map<String, Writer> writers = new HashMap<String, Writer>();
        Map<String, ErrorStatistics> errorMap = new LinkedHashMap<String, ErrorStatistics>();
        Map<String, Map<String, DescriptiveStatistics>> statMap = new HashMap<String, Map<String, DescriptiveStatistics>>();
        for (File file : files) {
            String filename = file.getName();
            LOG.debug("Processing " + filename);
            int index = Integer.parseInt(filename.substring(prefix.length(), prefix.length() + 1));
            String suffix = filename.substring(prefix.length() + 2, filename.lastIndexOf('_'));
            String fileType = filename.substring(filename.lastIndexOf('_') + 1, filename.lastIndexOf('.'));
            LOG.debug("Processing " + filename);
            LOG.debug("index: " + index);
            LOG.debug("suffix: " + suffix);
            LOG.debug("fileType: " + fileType);
            Writer writer = writers.get(fileType);
            boolean firstFile = false;
            if (writer == null) {
                writer = new BufferedWriter(new OutputStreamWriter(
                        new FileOutputStream(
                                new File(evalDir, prefix + "A_" + suffix + "_" + fileType + ".csv"), false),
                        "UTF8"));
                writers.put(fileType, writer);
                firstFile = true;
            }
            if (fileType.equals("KEMatrix")) {
                Scanner scanner = new Scanner(file);
                int i = 0;
                List<String> myGroupNames = new ArrayList<String>();
                Map<String, Boolean> haveCountMap = new HashMap<String, Boolean>();
                while (scanner.hasNextLine()) {
                    String line = scanner.nextLine();
                    List<String> cells = CSV.getCSVCells(line);
                    if (i == 0) {
                        for (int j = 0; j < cells.size(); j += 5) {
                            String groupName = cells.get(j);
                            if (!errorMap.containsKey(groupName)) {
                                errorMap.put(groupName, new ErrorStatistics());
                                statMap.put(groupName, new HashMap<String, DescriptiveStatistics>());
                                groupNames.add(groupName);
                            }
                            myGroupNames.add(groupName);
                        }
                    } else if (i == 1) {
                        // do nothing
                    } else {
                        String rowName = cells.get(0);
                        int j = 0;
                        for (String groupName : myGroupNames) {
                            ErrorStatistics errorStats = errorMap.get(groupName);
                            Map<String, DescriptiveStatistics> stats = statMap.get(groupName);
                            double correctCount = Double.parseDouble(cells.get(j * 5 + 1));
                            double errorCount = Double.parseDouble(cells.get(j * 5 + 2));
                            double totalCount = Double.parseDouble(cells.get(j * 5 + 3));
                            Boolean haveCount = haveCountMap.get(groupName);

                            if (rowName.equals("known")) {
                                errorStats.knownWordCorrectCount += correctCount;
                                errorStats.knownWordErrorCount += errorCount;
                            } else if (rowName.equals("unknown")) {
                                errorStats.unknownWordCorrectCount += correctCount;
                                errorStats.unknownWordErrorCount += errorCount;
                            } else if (rowName.equals("goodSeg")) {
                                errorStats.goodSegCorrectCount += correctCount;
                                errorStats.goodSegErrorCount += errorCount;
                            } else if (rowName.equals("badSeg")) {
                                errorStats.badSegCorrectCount += correctCount;
                                errorStats.badSegErrorCount += errorCount;
                            } else if (rowName.equals("knownLetters")) {
                                errorStats.knownWordCorrectLetterCount += correctCount;
                                errorStats.knownWordErrorLetterCount += errorCount;
                            } else if (rowName.equals("unknownLetters")) {
                                errorStats.unknownWordCorrectLetterCount += correctCount;
                                errorStats.unknownWordErrorLetterCount += errorCount;
                            } else if (rowName.equals("goodSegLetters")) {
                                errorStats.goodSegCorrectLetterCount += correctCount;
                                errorStats.goodSegErrorLetterCount += errorCount;
                            } else if (rowName.equals("badSegLetters")) {
                                errorStats.badSegCorrectLetterCount += correctCount;
                                errorStats.badSegErrorLetterCount += errorCount;
                            } else if (rowName.equals("inBeam")) {
                                errorStats.answerInBeamCorrectCount += correctCount;
                                errorStats.answerInBeamErrorCount += errorCount;
                            } else if (rowName.equals("total")) {
                                haveCountMap.put(groupName, totalCount > 0);
                            } else if (rowName.endsWith("%")) {
                                if (haveCount) {
                                    String keyPrefix = rowName.substring(0, rowName.length() - 1);
                                    String key = keyPrefix + "|correct";
                                    DescriptiveStatistics correctStat = stats.get(key);
                                    if (correctStat == null) {
                                        correctStat = new DescriptiveStatistics();
                                        stats.put(key, correctStat);
                                    }
                                    correctStat.addValue(correctCount);
                                    key = keyPrefix + "|error";
                                    DescriptiveStatistics errorStat = stats.get(key);
                                    if (errorStat == null) {
                                        errorStat = new DescriptiveStatistics();
                                        stats.put(key, errorStat);
                                    }
                                    errorStat.addValue(errorCount);
                                    key = keyPrefix + "|total";
                                    DescriptiveStatistics totalStat = stats.get(key);
                                    if (totalStat == null) {
                                        totalStat = new DescriptiveStatistics();
                                        stats.put(key, totalStat);
                                    }
                                    totalStat.addValue(totalCount);
                                }
                            }

                            j++;
                        }
                    }
                    i++;
                }
            } else {
                Scanner scanner = new Scanner(file);
                boolean firstLine = true;
                while (scanner.hasNextLine()) {
                    String line = scanner.nextLine();
                    if (firstLine) {
                        if (firstFile)
                            writer.write(line + "\n");
                        firstLine = false;
                    } else {
                        writer.write(line + "\n");
                    }
                    writer.flush();
                }
            } // file type
        } // next file

        Writer statsWriter = writers.get("KEMatrix");
        writeStats(statsWriter, errorMap);
        statsWriter.write("\n");
        String[] statTypes = new String[] { "known", "unknown", "goodSeg", "badSeg", "inBeam", "total",
                "knownLetter", "unknownLetter", "goodSegLetter", "badSegLetter", "totalLetter" };
        for (String statType : statTypes) {
            for (String groupName : groupNames) {
                Map<String, DescriptiveStatistics> statsMap = statMap.get(groupName);
                DescriptiveStatistics correctStat = statsMap.get(statType + "|correct");
                DescriptiveStatistics errorStat = statsMap.get(statType + "|error");
                DescriptiveStatistics totalStat = statsMap.get(statType + "|total");

                statsWriter.write(CSV.format(statType + "%Avg") + CSV.format(correctStat.getMean())
                        + CSV.format(errorStat.getMean()) + CSV.format(totalStat.getMean())
                        + CSV.getCsvSeparator());

            } // next group
            statsWriter.write("\n");
            for (String groupName : groupNames) {
                Map<String, DescriptiveStatistics> statsMap = statMap.get(groupName);
                DescriptiveStatistics correctStat = statsMap.get(statType + "|correct");
                DescriptiveStatistics errorStat = statsMap.get(statType + "|error");
                DescriptiveStatistics totalStat = statsMap.get(statType + "|total");

                statsWriter.write(CSV.format(statType + "%Dev") + CSV.format(correctStat.getStandardDeviation())
                        + CSV.format(errorStat.getStandardDeviation())
                        + CSV.format(totalStat.getStandardDeviation()) + CSV.getCsvSeparator());

            } // next group
            statsWriter.write("\n");
            statsWriter.flush();
        }
        statsWriter.close();

    } catch (IOException e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

/**
 * Clear out anything found in the right & left margins
 * @param sourceImage/*from w ww. jav a  2s . co  m*/
 */
void cleanMargins(SourceImage sourceImage) {
    LOG.debug("########## cleanMargins #########");

    int minCardinalityForMargin = 8;
    double averageShapeWidth = sourceImage.getAverageShapeWidth();

    LOG.debug("Finding right margin");
    double rightLimit = (double) sourceImage.getWidth() * 0.67;

    // first, create a DBScan cluster of all rows near the right-hand side
    List<RowOfShapes> rightHandRows = new ArrayList<RowOfShapes>();
    List<double[]> rightCoordinates = new ArrayList<double[]>();

    for (RowOfShapes row : sourceImage.getRows()) {
        double right = row.getRight();
        if (right >= rightLimit) {
            LOG.trace(row.toString());
            LOG.trace(
                    "Right: " + right + " + " + row.getXAdjustment() + " = " + (right - row.getXAdjustment()));
            right -= row.getXAdjustment();
            rightHandRows.add(row);
            rightCoordinates.add(new double[] { right });
        }
    }

    DBSCANClusterer<RowOfShapes> rightMarginClusterer = new DBSCANClusterer<RowOfShapes>(rightHandRows,
            rightCoordinates);
    Set<Set<RowOfShapes>> rowClusters = rightMarginClusterer.cluster(averageShapeWidth, minCardinalityForMargin,
            true);

    TreeSet<Set<RowOfShapes>> orderedRowClusters = new TreeSet<Set<RowOfShapes>>(
            new CardinalityComparator<RowOfShapes>());
    orderedRowClusters.addAll(rowClusters);

    int i = 0;

    // find the right-most cluster with sufficient cardinality, and assume it's the right margin
    DescriptiveStatistics rightMarginStats = null;
    for (Set<RowOfShapes> cluster : orderedRowClusters) {
        DescriptiveStatistics rightStats = new DescriptiveStatistics();
        for (RowOfShapes row : cluster)
            rightStats.addValue(row.getRight() - row.getXAdjustment());

        LOG.debug("Cluster " + i + ". Cardinality=" + cluster.size());
        LOG.debug("Right mean : " + rightStats.getMean());
        LOG.debug("Right std dev: " + rightStats.getStandardDeviation());

        if (cluster.size() >= minCardinalityForMargin
                && (rightMarginStats == null || rightMarginStats.getMean() < rightStats.getMean())) {
            rightMarginStats = rightStats;
        }
        i++;
    }

    // see how many rows would violate this margin - if too many, assume no margin
    // these rows are only rows which extend across the margin
    if (rightMarginStats != null) {
        LOG.debug("Right margin mean : " + rightMarginStats.getMean());
        LOG.debug("Right margin std dev: " + rightMarginStats.getStandardDeviation());

        double rightMarginLimit = rightMarginStats.getMean() + sourceImage.getAverageShapeWidth();
        LOG.debug("rightMarginLimit: " + rightMarginLimit);
        int numRowsToChop = 0;
        for (RowOfShapes row : sourceImage.getRows()) {
            if (row.getRight() >= rightLimit) {
                if (row.getRight() - row.getXAdjustment() >= rightMarginLimit
                        && row.getLeft() - row.getXAdjustment() <= rightMarginLimit) {
                    LOG.debug("Found overlapping row : " + row);
                    LOG.debug("Adjusted right : " + (row.getRight() - row.getXAdjustment()));
                    numRowsToChop++;
                }
            }
        }
        if (numRowsToChop >= 3) {
            LOG.debug("Too many overlapping rows - ignoring margin");
            rightMarginStats = null;
        }
    }

    if (rightMarginStats != null) {
        double rightMarginLimit = rightMarginStats.getMean() + sourceImage.getAverageShapeWidth();
        List<RowOfShapes> rowsToRemove = new ArrayList<RowOfShapes>();
        for (RowOfShapes row : sourceImage.getRows()) {
            double right = row.getRight() - row.getXAdjustment();
            LOG.trace(row.toString());
            LOG.trace("Adjusted right: " + right);

            if (right >= rightMarginLimit) {
                LOG.trace("Has out-of-margin stuff!");
                // need to chop off groups to the right of this threshold
                List<GroupOfShapes> groupsToChop = new ArrayList<GroupOfShapes>();
                for (GroupOfShapes group : row.getGroups()) {
                    if (group.getLeft() - row.getXAdjustment() > rightMarginLimit) {
                        groupsToChop.add(group);
                        LOG.debug("Chopping group outside of right margin: " + group);
                    }
                }
                for (GroupOfShapes group : groupsToChop) {
                    row.getShapes().removeAll(group.getShapes());
                }
                row.getGroups().removeAll(groupsToChop);

                if (row.getGroups().size() == 0) {
                    LOG.debug("Removing empty " + row);
                    rowsToRemove.add(row);
                } else {
                    row.recalculate();
                    row.assignGuideLines();
                }
            } // does this row extend beyond the margin?
        } // next row
        sourceImage.getRows().removeAll(rowsToRemove);
    } // have a right margin

    LOG.debug("Finding left margin");
    double leftLimit = (double) sourceImage.getWidth() * 0.33;

    // first, create a DBScan cluster of all rows near the left-hand side
    List<RowOfShapes> leftHandRows = new ArrayList<RowOfShapes>();
    List<double[]> leftCoordinates = new ArrayList<double[]>();

    for (RowOfShapes row : sourceImage.getRows()) {
        double left = row.getLeft();
        if (left <= leftLimit) {
            LOG.trace(row.toString());
            LOG.trace("Left: " + left + " - " + row.getXAdjustment() + " = " + (left - row.getXAdjustment()));
            left -= row.getXAdjustment();
            leftHandRows.add(row);
            leftCoordinates.add(new double[] { left });
        }
    }

    DBSCANClusterer<RowOfShapes> leftMarginClusterer = new DBSCANClusterer<RowOfShapes>(leftHandRows,
            leftCoordinates);
    Set<Set<RowOfShapes>> rowClustersLeft = leftMarginClusterer.cluster(averageShapeWidth,
            minCardinalityForMargin, true);

    TreeSet<Set<RowOfShapes>> orderedRowClustersLeft = new TreeSet<Set<RowOfShapes>>(
            new CardinalityComparator<RowOfShapes>());
    orderedRowClustersLeft.addAll(rowClustersLeft);

    i = 0;

    // find the left-most cluster with sufficient cardinality, and assume it's the left margin
    DescriptiveStatistics leftMarginStats = null;
    for (Set<RowOfShapes> cluster : orderedRowClustersLeft) {
        DescriptiveStatistics leftStats = new DescriptiveStatistics();
        for (RowOfShapes row : cluster)
            leftStats.addValue(row.getLeft() - row.getXAdjustment());

        LOG.debug("Cluster " + i + ". Cardinality=" + cluster.size());
        LOG.debug("Left mean : " + leftStats.getMean());
        LOG.debug("Left std dev: " + leftStats.getStandardDeviation());

        if (cluster.size() >= minCardinalityForMargin
                && (leftMarginStats == null || leftMarginStats.getMean() > leftStats.getMean())) {
            leftMarginStats = leftStats;
        }
        i++;
    }

    // see how many rows would violate this margin - if too many, assume no margin
    // these rows are only rows which extend across the margin
    if (leftMarginStats != null) {
        LOG.debug("Left margin mean : " + leftMarginStats.getMean());
        LOG.debug("Left margin std dev: " + leftMarginStats.getStandardDeviation());

        double leftMarginLimit = leftMarginStats.getMean() - sourceImage.getAverageShapeWidth();
        LOG.debug("leftMarginLimit: " + leftMarginLimit);
        int numRowsToChop = 0;
        for (RowOfShapes row : sourceImage.getRows()) {
            if (row.getLeft() <= leftLimit) {
                if (row.getLeft() - row.getXAdjustment() <= leftMarginLimit
                        && row.getRight() - row.getXAdjustment() >= leftMarginLimit) {
                    LOG.debug("Found overlapping row : " + row);
                    LOG.debug("Adjusted left : " + (row.getLeft() - row.getXAdjustment()));
                    numRowsToChop++;
                }
            }
        }
        if (numRowsToChop >= 3) {
            LOG.debug("Too many overlapping rows - ignoring margin");
            leftMarginStats = null;
        }
    }

    if (leftMarginStats != null) {
        double leftMarginLimit = leftMarginStats.getMean() - sourceImage.getAverageShapeWidth();
        List<RowOfShapes> rowsToRemove = new ArrayList<RowOfShapes>();
        for (RowOfShapes row : sourceImage.getRows()) {
            double left = row.getLeft() - row.getXAdjustment();
            LOG.trace(row.toString());
            LOG.trace("Adjusted left: " + left);

            if (left <= leftMarginLimit) {
                LOG.trace("Has out-of-margin stuff!");
                // need to chop off groups to the left of this threshold
                List<GroupOfShapes> groupsToChop = new ArrayList<GroupOfShapes>();
                for (GroupOfShapes group : row.getGroups()) {
                    if (group.getRight() - row.getXAdjustment() < leftMarginLimit) {
                        groupsToChop.add(group);
                        LOG.debug("Chopping group outside of left margin: " + group);
                    }
                }
                for (GroupOfShapes group : groupsToChop) {
                    row.getShapes().removeAll(group.getShapes());
                }
                row.getGroups().removeAll(groupsToChop);

                if (row.getGroups().size() == 0) {
                    LOG.debug("Removing empty " + row);
                    rowsToRemove.add(row);
                } else {
                    row.recalculate();
                    row.assignGuideLines();
                }
            } // does this row extend beyond the margin?
        } // next row
        sourceImage.getRows().removeAll(rowsToRemove);
    } // have a left margin
}

From source file:guineu.modules.dataanalysis.wilcoxontest.WilcoxonTestTask.java

public double[] Ttest(int mol) throws IllegalArgumentException {
    DescriptiveStatistics stats1 = new DescriptiveStatistics();
    DescriptiveStatistics stats2 = new DescriptiveStatistics();
    double[] values = new double[3];
    String parameter1 = "";

    if (parameter == null) {
        for (int i = 0; i < group1.length; i++) {
            try {
                stats1.addValue((Double) this.dataset.getRow(mol).getPeak(group1[i]));
            } catch (Exception e) {
                e.printStackTrace();// ww w .ja v a  2 s .c  om
            }
        }
        for (int i = 0; i < group2.length; i++) {
            try {
                stats2.addValue((Double) this.dataset.getRow(mol).getPeak(group2[i]));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    } else {
        try {
            // Determine groups for selected raw data files
            List<String> availableParameterValues = dataset.getParameterAvailableValues(parameter);

            int numberOfGroups = availableParameterValues.size();

            if (numberOfGroups > 1) {
                parameter1 = availableParameterValues.get(0);
                String parameter2 = availableParameterValues.get(1);

                for (String sampleName : dataset.getAllColumnNames()) {
                    if (dataset.getParametersValue(sampleName, parameter) != null
                            && dataset.getParametersValue(sampleName, parameter).equals(parameter1)) {
                        try {
                            stats1.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                        } catch (Exception e) {
                        }
                    } else if (dataset.getParametersValue(sampleName, parameter) != null
                            && dataset.getParametersValue(sampleName, parameter).equals(parameter2)) {
                        try {
                            stats2.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                        } catch (Exception e) {
                        }
                    }
                }
            } else {
                return null;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    try {
        final Rengine rEngine;
        try {
            rEngine = RUtilities.getREngine();
        } catch (Throwable t) {

            throw new IllegalStateException(
                    "Wilcoxon test requires R but it couldn't be loaded (" + t.getMessage() + ')');
        }
        synchronized (RUtilities.R_SEMAPHORE) {
            rEngine.eval("x <- 0");
            rEngine.eval("y <- 0");
            long group1 = rEngine.rniPutDoubleArray(stats1.getValues());
            rEngine.rniAssign("x", group1, 0);

            long group2 = rEngine.rniPutDoubleArray(stats2.getValues());
            rEngine.rniAssign("y", group2, 0);
            /* if(mol == 1){
             rEngine.eval("write.csv(x, \"x.csv\")");
             rEngine.eval("write.csv(y, \"y.csv\")");
             }*/
            rEngine.eval("result <- 0");

            rEngine.eval("result <- wilcox.test(as.numeric(t(x)),as.numeric(t(y)))");
            long e = rEngine.rniParse("result$p.value", 1);
            long r = rEngine.rniEval(e, 0);
            REXP x = new REXP(rEngine, r);

            values[0] = x.asDouble();
        }

        rEngine.end();
        setStatus(TaskStatus.FINISHED);
    } catch (Exception ex) {
        Logger.getLogger(WilcoxonTestTask.class.getName()).log(Level.SEVERE, null, ex);
        setStatus(TaskStatus.ERROR);
    }

    values[1] = stats1.getMean();
    values[2] = stats2.getMean();
    return values;
}