Example usage for org.apache.commons.math.stat.descriptive.moment Mean getN

List of usage examples for org.apache.commons.math.stat.descriptive.moment Mean getN

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive.moment Mean getN.

Prototype

public long getN() 

Source Link

Usage

From source file:com.joliciel.jochre.graphics.RowOfShapesImpl.java

/**
 * The regression passes through the bottom of average shapes on this line.
 * It gives the line's slope, and a starting point for finding the baseline and meanline.
 *///from www  .j  av  a 2 s. co  m
public SimpleRegression getRegression() {
    if (this.regression == null) {
        // begin by calculating some sort of average line crossing the whole row, so that we can see if the row is
        // rising or falling to start with?
        // Calculate the line crossing the mid-point of all "average" shapes on this row
        // get the "smoothed" linear approximation of the mid-points
        regression = new SimpleRegression();

        int numShapes = 0;
        int minShapes = 10;
        DescriptiveStatistics shapeWidthStats = new DescriptiveStatistics();
        DescriptiveStatistics shapeHeightStats = new DescriptiveStatistics();

        for (Shape shape : this.getShapes()) {
            shapeWidthStats.addValue(shape.getWidth());
            shapeHeightStats.addValue(shape.getHeight());
        }

        double minWidth = shapeWidthStats.getPercentile(25);
        double maxWidth = shapeWidthStats.getPercentile(75);
        double minHeight = shapeHeightStats.getPercentile(25);
        double maxHeight = shapeHeightStats.getPercentile(75);

        for (Shape shape : this.getShapes()) {
            // only add points whose shape is of "average" width and height (to leave out commas, etc.)
            if (shape.getWidth() >= minWidth && shape.getWidth() <= maxWidth && shape.getHeight() >= minHeight
                    && shape.getHeight() <= maxHeight) {

                // using bottom only, since rows with different font sizes tend to align bottom
                regression.addData((((double) shape.getLeft() + (double) shape.getRight()) / 2.0),
                        ((double) shape.getBottom()));
                numShapes++;
            }
        }

        // special case where row contains very few shapes (generally letter or number + period)
        boolean horizontalLine = false;
        if (numShapes < minShapes) {
            LOG.debug("Too few shapes: " + numShapes + ", assuming straight horizontal line");
            horizontalLine = true;
        } else if ((this.getRight() - this.getLeft()) < (this.getContainer().getWidth() / 6.0)) {
            LOG.debug("Too narrow: " + (this.getRight() - this.getLeft())
                    + ", assuming straight horizontal line");
            horizontalLine = true;
        }
        if (horizontalLine) {
            // assume a straight horizontal line
            Mean midPointMean = new Mean();
            for (Shape shape : this.getShapes()) {
                // only add points whose shape is of "average" height (to leave out commas, etc.)
                if (shape.getWidth() >= minWidth && shape.getWidth() <= maxWidth
                        && shape.getHeight() >= minHeight && shape.getHeight() <= maxHeight) {
                    midPointMean.increment((double) shape.getBottom());
                }
            }
            if (midPointMean.getN() == 0) {
                for (Shape shape : this.getShapes()) {
                    midPointMean.increment((double) shape.getBottom());
                }
            }
            double meanMidPoint = midPointMean.getResult();
            regression = new SimpleRegression();
            regression.addData(this.getLeft(), meanMidPoint);
            regression.addData(this.getRight(), meanMidPoint);
        }

        // displays intercept of regression line
        LOG.debug("intercept: " + regression.getIntercept());

        // displays slope of regression line
        LOG.debug("slope: " + regression.getSlope());

        // displays slope standard error
        LOG.debug("std err: " + regression.getSlopeStdErr());

        LOG.debug("x = 0, y = " + regression.predict(0));
        LOG.debug("x = " + this.getContainer().getWidth() + ", y = "
                + regression.predict(this.getContainer().getWidth()));
    }
    return regression;
}

From source file:com.joliciel.jochre.graphics.SourceImageImpl.java

@Override
public double getMeanHorizontalSlope() {
    if (!meanHorizontalSlopeCalculated) {
        // Calculate the average regression to be used for analysis
        Mean meanForSlope = new Mean();
        StandardDeviation stdDevForSlope = new StandardDeviation();
        List<SimpleRegression> regressions = new ArrayList<SimpleRegression>();
        for (RowOfShapes row : this.getRows()) {
            SimpleRegression regression = row.getRegression();
            // only include rows for which regression was really calculated (more than 2 points)
            if (regression.getN() > 2) {
                meanForSlope.increment(regression.getSlope());
                stdDevForSlope.increment(regression.getSlope());
                regressions.add(regression);
            }/*from  w ww .  j  a v a 2 s .  c o  m*/
        }

        double slopeMean = 0.0;
        double slopeStdDev = 0.0;

        if (meanForSlope.getN() > 0) {
            slopeMean = meanForSlope.getResult();
            slopeStdDev = stdDevForSlope.getResult();
        }
        LOG.debug("slopeMean: " + slopeMean);
        LOG.debug("slopeStdDev: " + slopeStdDev);

        if (regressions.size() > 0) {
            double minSlope = slopeMean - slopeStdDev;
            double maxSlope = slopeMean + slopeStdDev;
            meanForSlope = new Mean();
            for (SimpleRegression regression : regressions) {
                if (minSlope <= regression.getSlope() && regression.getSlope() <= maxSlope)
                    meanForSlope.increment(regression.getSlope());
            }

            meanHorizontalSlope = meanForSlope.getResult();
        } else {
            meanHorizontalSlope = 0.0;
        }
        LOG.debug("meanHorizontalSlope: " + meanHorizontalSlope);
        meanHorizontalSlopeCalculated = true;
    }
    return meanHorizontalSlope;
}

From source file:com.joliciel.jochre.graphics.RowOfShapesImpl.java

/**
 * If there are different font-sizes in the current row,
 * calculate separate guidelines for the separate font-sizes.
 * Assumes groups have already been assigned.
 * @return index of first group after split
 *///from   w ww . j  a  v  a 2  s.  co  m
@Override
public void splitByFontSize() {
    LOG.debug("splitByFontSize, " + this.toString());
    double[] meanAscenderToXHeightRatios = new double[this.getGroups().size()];
    int i = 0;
    double xHeight = this.getXHeight();
    double minHeightRatio = 0.7;
    for (GroupOfShapes group : this.getGroups()) {
        Mean meanAscenderToXHeightRatio = new Mean();
        for (Shape shape : group.getShapes()) {
            if (((double) shape.getHeight() / xHeight) > minHeightRatio) {
                double ascenderToXHeightRatio = ((double) shape.getBaseLine() / xHeight);
                LOG.trace("Shape " + shape.getIndex() + ": " + ascenderToXHeightRatio);
                meanAscenderToXHeightRatio.increment(ascenderToXHeightRatio);
            }
        }
        if (meanAscenderToXHeightRatio.getN() > 0) {
            meanAscenderToXHeightRatios[i] = meanAscenderToXHeightRatio.getResult();
            LOG.debug(group.toString() + ": " + meanAscenderToXHeightRatios[i]);
        }
        i++;
    }

    double threshold = 0.15;
    LOG.debug("threshold: " + threshold);

    double lastRatio = 0;

    List<int[]> bigAreas = new ArrayList<int[]>();
    int bigAreaStart = 0;
    int inBigArea = -1;
    for (i = 0; i < this.getGroups().size(); i++) {
        if (i > 0) {
            if (meanAscenderToXHeightRatios[i] != 0) {
                if ((inBigArea < 0 || inBigArea == 1)
                        && lastRatio - meanAscenderToXHeightRatios[i] >= threshold) {
                    // big drop
                    int[] bigArea = new int[] { bigAreaStart, i - 1 };
                    bigAreas.add(bigArea);
                    LOG.debug("Adding big area " + bigArea[0] + "," + bigArea[1]);
                    inBigArea = 0;
                } else if ((inBigArea < 0 || inBigArea == 0)
                        && meanAscenderToXHeightRatios[i] - lastRatio >= threshold) {
                    // big leap
                    bigAreaStart = i;
                    inBigArea = 1;
                }
            }
        }

        if (meanAscenderToXHeightRatios[i] != 0)
            lastRatio = meanAscenderToXHeightRatios[i];
    }
    if (inBigArea == 1) {
        int[] bigArea = new int[] { bigAreaStart, this.getGroups().size() - 1 };
        bigAreas.add(bigArea);
        LOG.debug("Adding big area " + bigArea[0] + "," + bigArea[1]);
    }

    // Now, which of these big areas are really big enough
    if (bigAreas.size() > 0) {
        double minBrightnessRatioForSplit = 1.5;
        Mean brightnessMean = new Mean();
        Mean[] meanCardinalities = new Mean[bigAreas.size()];
        for (i = 0; i < bigAreas.size(); i++) {
            meanCardinalities[i] = new Mean();
        }
        i = 0;
        for (GroupOfShapes group : this.getGroups()) {
            int bigAreaIndex = -1;
            int j = 0;
            for (int[] bigArea : bigAreas) {
                if (i >= bigArea[0] && i <= bigArea[1]) {
                    bigAreaIndex = j;
                    break;
                }
                j++;
            }
            for (Shape shape : group.getShapes()) {
                if (((double) shape.getHeight() / xHeight) > minHeightRatio) {
                    if (bigAreaIndex >= 0) {
                        meanCardinalities[bigAreaIndex].increment(shape.getTotalBrightness());
                    } else {
                        brightnessMean.increment(shape.getTotalBrightness());
                    }
                }
            }
            i++;
        } // next group

        boolean[] bigAreaConfirmed = new boolean[bigAreas.size()];
        boolean hasSplit = false;
        LOG.debug("brightnessMean for small areas: " + brightnessMean.getResult());
        for (i = 0; i < bigAreas.size(); i++) {
            int[] bigArea = bigAreas.get(i);
            double ratio = meanCardinalities[i].getResult() / brightnessMean.getResult();
            LOG.debug("big area " + bigArea[0] + "," + bigArea[1]);
            LOG.debug("brightness mean: " + meanCardinalities[i].getResult());
            LOG.debug("brightness ratio: " + ratio);
            if (ratio > minBrightnessRatioForSplit) {
                // split found!
                LOG.debug("Confirmed!");
                bigAreaConfirmed[i] = true;
                hasSplit = true;
            }
        }

        List<GroupOfShapes> bigGroups = null;
        List<GroupOfShapes> littleGroups = null;

        if (hasSplit) {
            bigGroups = new ArrayList<GroupOfShapes>();
            littleGroups = new ArrayList<GroupOfShapes>();
            i = 0;
            boolean lastGroupSingleShapeLittle = false;
            boolean lastGroupBig = false;
            GroupOfShapes lastGroup = null;
            for (GroupOfShapes group : this.getGroups()) {
                boolean singleShapeLittleGroup = false;
                int bigAreaIndex = -1;
                int j = 0;
                for (int[] bigArea : bigAreas) {
                    if (i >= bigArea[0] && i <= bigArea[1]) {
                        bigAreaIndex = j;
                        break;
                    }
                    j++;
                }
                if (bigAreaIndex >= 0 && bigAreaConfirmed[bigAreaIndex]) {
                    if (lastGroupSingleShapeLittle) {
                        // Can't keep single shape little groups on their own
                        LOG.debug("Switching last group to big: " + lastGroup.toString());
                        littleGroups.remove(littleGroups.size() - 1);
                        bigGroups.add(lastGroup);
                    }
                    LOG.debug("Adding big group " + group.toString());
                    bigGroups.add(group);
                    lastGroupBig = true;
                } else {
                    LOG.debug("Adding little group " + group.toString());
                    littleGroups.add(group);

                    if (group.getShapes().size() == 1 && lastGroupBig) {
                        singleShapeLittleGroup = true;
                    }
                    lastGroupBig = false;
                }
                lastGroupSingleShapeLittle = singleShapeLittleGroup;
                lastGroup = group;
                i++;
            } // next group   

            hasSplit = bigGroups.size() > 0 && littleGroups.size() > 0;
        }

        if (hasSplit) {
            int xHeightBig = this.assignGuideLines(bigGroups);
            int xHeightLittle = this.assignGuideLines(littleGroups);

            // There may be a better way of determining which xHeight to use for the row
            // than simply based on number of groups, e.g. group width, etc.
            if (bigGroups.size() > littleGroups.size()) {
                LOG.debug("Setting xHeight to " + xHeightBig);
                this.setXHeight(xHeightBig);
            } else {
                LOG.debug("Setting xHeight to " + xHeightLittle);
                this.setXHeight(xHeightLittle);
            }
            LOG.debug("Setting xHeightMax to " + xHeightBig);
            this.setXHeightMax(xHeightBig);
        } // has split
    } // split candidate

}

From source file:com.joliciel.jochre.graphics.RowOfShapesImpl.java

/**
 * Assign guidelines for a certain subset of shapes, and return the x-height.
 * @param startShape//from   w  w w.j  ava2 s . c o  m
 * @param endShape
 * @return
 */
int assignGuideLines(List<GroupOfShapes> groupsToAssign) {
    LOG.debug("assignGuideLines internal");
    double meanHorizontalSlope = this.getContainer().getMeanHorizontalSlope();

    // the base-line and mean-line will be at a fixed distance away from the midpoint
    // the question is, which distance!
    // To find this out, we count number of black pixels on each row above this line
    // And then start analysing from the top and the bottom until the number drops off sharply

    // The notion of "groupsToAssign" is used to only assign guidelines
    // to a subset of the groups on the line
    // when the line contains two different font sizes
    List<Shape> shapes = new ArrayList<Shape>();
    if (groupsToAssign != null) {
        for (GroupOfShapes group : groupsToAssign) {
            shapes.addAll(group.getShapes());
        }
    } else {
        shapes = this.getShapes();
    }

    int i = 0;
    DescriptiveStatistics shapeWidthStats = new DescriptiveStatistics();
    DescriptiveStatistics shapeHeightStats = new DescriptiveStatistics();

    for (Shape shape : this.getShapes()) {
        shapeWidthStats.addValue(shape.getWidth());
        shapeHeightStats.addValue(shape.getHeight());
    }

    double minWidth = shapeWidthStats.getPercentile(25);
    double maxWidth = shapeWidthStats.getPercentile(75);
    double minHeight = shapeHeightStats.getPercentile(45);
    double maxHeight = shapeHeightStats.getPercentile(75);

    double rowMidPointX = (double) (this.getLeft() + this.getRight()) / 2.0;

    // calculating the Y midpoint by the shapes in the row, instead of by the top & bottom of row
    Mean rowMidPointYMean = new Mean();
    for (Shape shape : this.getShapes()) {
        // only add points whose shape is of "average" width and height (to leave out commas, etc.)
        if (shape.getWidth() >= minWidth && shape.getWidth() <= maxWidth && shape.getHeight() >= minHeight
                && shape.getHeight() <= maxHeight) {
            rowMidPointYMean.increment((double) (shape.getBottom() + shape.getTop()) / 2.0);
        }
    }

    double rowMidPointY = (double) (this.getTop() + this.getBottom()) / 2.0;
    if (rowMidPointYMean.getN() > 0)
        rowMidPointY = rowMidPointYMean.getResult();
    LOG.debug("rowMidPointX: " + rowMidPointX);
    LOG.debug("rowMidPointY: " + rowMidPointY);

    // figure out where the top-most shape starts and the bottom-most shape ends, relative to the y midline
    int minTop = Integer.MAX_VALUE;
    int maxBottom = Integer.MIN_VALUE;
    List<Integer> rowYMidPoints = new ArrayList<Integer>(shapes.size());
    for (Shape shape : shapes) {
        double shapeMidPointX = (double) (shape.getLeft() + shape.getRight()) / 2.0;
        int shapeMidPointY = (int) Math
                .round(rowMidPointY + (meanHorizontalSlope * (shapeMidPointX - rowMidPointX)));
        rowYMidPoints.add(shapeMidPointY);

        int relativeTop = shape.getTop() - shapeMidPointY;
        int relativeBottom = shape.getBottom() - shapeMidPointY;

        if (relativeTop < minTop)
            minTop = relativeTop;
        if (relativeBottom > maxBottom)
            maxBottom = relativeBottom;
    }
    if (minTop > 0)
        minTop = 0;
    if (maxBottom < 0)
        maxBottom = 0;

    int yIntervalTop = 0 - minTop;
    int yIntervalBottom = maxBottom;
    int yInterval = yIntervalTop + 1 + yIntervalBottom;
    LOG.debug("yIntervalTop: " + yIntervalTop);
    LOG.debug("yIntervalBottom: " + yIntervalBottom);
    LOG.debug("yInterval: " + yInterval);
    int[] pixelCounts = new int[yInterval];

    // Get the pixel count for each row
    // examining one shape at a time to limit ourselves to the pixels that are
    // actually considered to be in this row
    int blackThreshold = this.getContainer().getSeparationThreshold();
    int shapeIndex = 0;
    int shapeCount = 0;
    for (Shape shape : shapes) {
        if (shape.getHeight() >= minHeight) {
            LOG.trace(shape.toString());
            shapeCount++;
            int shapeMidPointY = rowYMidPoints.get(shapeIndex);
            int zeroLine = shapeMidPointY - yIntervalTop;
            int topIndex = shape.getTop() - zeroLine;
            for (int x = 0; x < shape.getWidth(); x++) {
                for (int y = 0; y < shape.getHeight(); y++) {
                    int yIndex = topIndex + y;
                    if (yIndex >= 0 && yIndex < pixelCounts.length
                            && shape.isPixelBlack(x, y, blackThreshold)) {
                        pixelCounts[yIndex]++;
                    }
                }
            }
        }
        shapeIndex++;
    }
    LOG.debug("Got pixels from " + shapeCount + " shapes.");

    boolean notEnoughShapes = shapeCount < 3;
    LOG.debug("notEnoughShapes? " + notEnoughShapes);

    // We start at the top
    // As soon as we reach a line with more pixels than the mean, we assume this is the mean-line
    Mean pixelCountMeanTop = new Mean();
    StandardDeviation pixelCountStdDevTop = new StandardDeviation();
    for (i = 0; i <= yIntervalTop; i++) {
        pixelCountMeanTop.increment(pixelCounts[i]);
        pixelCountStdDevTop.increment(pixelCounts[i]);
    }
    LOG.debug("Top: pixel count mean: " + pixelCountMeanTop.getResult() + ", std dev: "
            + pixelCountStdDevTop.getResult());

    double threshold = pixelCountMeanTop.getResult() * 1.1;
    if (notEnoughShapes) {
        threshold = threshold / 2.0;
    }
    double lowerThreshold = threshold / 2.0;

    LOG.debug("Top threshold: " + threshold);
    LOG.debug("Top lowerThreshold: " + lowerThreshold);

    int meanLine = 0;
    boolean findMeanLine = true;
    for (i = 0; i <= yIntervalTop; i++) {
        int pixelCount = pixelCounts[i];
        if (findMeanLine && pixelCount > threshold) {
            meanLine = i;
            findMeanLine = false;
        } else if (!findMeanLine && pixelCount < lowerThreshold) {
            findMeanLine = true;
        }
    }

    // We start at the bottom
    // As soon as we reach a line with more pixels than the mean, we assume this is the base-line

    Mean pixelCountMeanBottom = new Mean();
    StandardDeviation pixelCountStdDevBottom = new StandardDeviation();
    for (i = pixelCounts.length - 1; i >= yIntervalTop; i--) {
        pixelCountMeanBottom.increment(pixelCounts[i]);
        pixelCountStdDevBottom.increment(pixelCounts[i]);
    }
    LOG.debug("Bottom: pixel count mean: " + pixelCountMeanBottom.getResult() + ", std dev: "
            + pixelCountStdDevBottom.getResult());

    threshold = pixelCountMeanBottom.getResult() * 1.1;
    if (notEnoughShapes) {
        threshold = threshold / 2.0;
    }
    lowerThreshold = threshold / 2.0;

    LOG.debug("Bottom threshold: " + threshold);
    LOG.debug("Bottom lowerThreshold: " + lowerThreshold);
    int baseLine = meanLine;
    boolean findBaseLine = true;
    for (i = pixelCounts.length - 1; i >= yIntervalTop; i--) {
        int pixelCount = pixelCounts[i];
        if (findBaseLine && pixelCount > threshold) {
            baseLine = i;
            findBaseLine = false;
        } else if (!findBaseLine && pixelCount < lowerThreshold) {
            findBaseLine = true;
        }
    }

    for (i = 0; i < yInterval; i++) {
        int pixelCount = pixelCounts[i];
        if (i == meanLine)
            LOG.trace("======= MEAN LINE " + i + " ==========");
        LOG.trace("pixel row " + i + ". pixel count " + pixelCount);
        if (i == baseLine)
            LOG.trace("======= BASE LINE " + i + " ==========");
    }

    // assign base lines and mean lines to each shape
    shapeIndex = 0;
    for (Shape shape : shapes) {
        int shapeMidPointY = rowYMidPoints.get(shapeIndex);
        int yMeanline = (shapeMidPointY - yIntervalTop) + meanLine;
        int yBaseline = (shapeMidPointY - yIntervalTop) + baseLine;
        LOG.trace(shape.toString() + ", meanLine: " + (yMeanline - shape.getTop()) + ", baseLine: "
                + (yBaseline - shape.getTop()));
        shape.setBaseLine(yBaseline - shape.getTop());
        shape.setMeanLine(yMeanline - shape.getTop());
        shapeIndex++;
    } // next shape

    int xHeight = baseLine - meanLine;
    return xHeight;
}