Example usage for org.apache.commons.math.stat.descriptive.moment Mean getResult

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive.moment Mean getResult.

Prototype

@Override
public double getResult()

Source Link

Usage

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

/**
 * Get a random sample (with replacement) of shapes on this image.
 * @param sourceImage/*from  ww  w . java 2  s .c  o m*/
 * @param sampleSize
 * @return
 */
List<Shape> getSample(Collection<RowOfShapes> rows, int sampleSize, boolean bigShapesOnly) {
    double minShapeWidth = 0;
    double minShapeHeight = 0;
    double maxShapeWidth = Double.MAX_VALUE;
    double maxShapeHeight = Double.MAX_VALUE;
    if (bigShapesOnly) {
        Mean widthMean = new Mean();
        Mean heightMean = new Mean();
        for (RowOfShapes row : rows) {
            for (Shape shape : row.getShapes()) {
                widthMean.increment(shape.getWidth());
                heightMean.increment(shape.getHeight());
            }
        }
        minShapeWidth = widthMean.getResult();
        minShapeHeight = heightMean.getResult();
        maxShapeWidth = minShapeWidth * 2.5;
        maxShapeHeight = minShapeHeight * 2.5;
    }
    List<Shape> sample = new ArrayList<Shape>(sampleSize);
    int countBad = 0;
    while (sample.size() < sampleSize) {
        if (countBad >= 10) {
            minShapeWidth = 0;
            minShapeHeight = 0;
            maxShapeWidth = Double.MAX_VALUE;
            maxShapeHeight = Double.MAX_VALUE;
        }
        double random = Math.random();
        int rowIndex = (int) Math.floor(random * rows.size());
        Iterator<RowOfShapes> iRows = rows.iterator();
        RowOfShapes row = null;
        for (int i = 0; i <= rowIndex; i++) {
            row = iRows.next();
        }
        random = Math.random();

        int index = (int) Math.floor(random * row.getShapes().size());
        Shape shape = row.getShapes().get(index);
        if (shape.getWidth() > minShapeWidth && shape.getHeight() > minShapeHeight
                && shape.getWidth() < maxShapeWidth && shape.getHeight() < maxShapeHeight) {
            sample.add(shape);
            countBad = 0;
        } else {
            countBad++;
        }
    }
    return sample;
}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

private int getFillFactor(SourceImage sourceImage) {
    LOG.debug("########## getFillFactor #########");
    List<Shape> sample = this.getSample(sourceImage.getRows(), 40, true);
    Mean mean = new Mean();
    ShapeFiller shapeFiller = this.graphicsService.getShapeFiller();
    for (Shape shape : sample) {
        LOG.debug("Shape: " + shape);
        int fillFactor = shapeFiller.getFillFactor(shape, sourceImage.getBlackThreshold());
        LOG.debug("fillFactor: " + fillFactor);
        mean.increment(fillFactor);/*ww  w.  ja  v  a2s. c o m*/
    }
    double meanFillFactor = mean.getResult();
    LOG.debug("meanFillFactor: " + meanFillFactor);
    int imageFillFactor = (int) Math.round(mean.getResult());
    LOG.debug("imageFillFactor: " + imageFillFactor);
    return imageFillFactor;
}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

void removeOversizedShapes(List<Shape> shapes) {
    LOG.debug("########## removeOversizedShapes #########");
    Mean shapeHeightMean = new Mean();
    Mean shapeWidthMean = new Mean();

    for (Shape shape : shapes) {
        shapeHeightMean.increment(shape.getHeight());
        shapeWidthMean.increment(shape.getWidth());
    }/*w ww.ja  v a2  s . c o  m*/

    double heightMean = shapeHeightMean.getResult();
    double widthMean = shapeWidthMean.getResult();
    LOG.debug("heightMean: " + heightMean);
    LOG.debug("widthMean: " + widthMean);

    shapeHeightMean = new Mean();
    shapeWidthMean = new Mean();
    StandardDeviation shapeHeightStdDev = new StandardDeviation();
    for (Shape shape : shapes) {
        if (shape.getHeight() > heightMean && shape.getHeight() < (heightMean * 2.0)
                && shape.getWidth() > widthMean && shape.getWidth() < (widthMean * 2.0)) {
            shapeHeightMean.increment(shape.getHeight());
            shapeHeightStdDev.increment(shape.getHeight());
            shapeWidthMean.increment(shape.getWidth());
        }
    }

    heightMean = shapeHeightMean.getResult();
    widthMean = shapeWidthMean.getResult();
    LOG.debug("average shape heightMean: " + heightMean);
    LOG.debug("average shape widthMean: " + widthMean);

    double minHeightBigShape = heightMean * 6;
    double minWidthWideShape = widthMean * 6;
    double minHeightWideShape = heightMean * 1.5;
    double minHeightTallShape = heightMean * 2.5;
    double maxWidthTallShape = widthMean / 2;
    LOG.debug("minHeightBigShape: " + minHeightBigShape);
    LOG.debug("minWidthWideShape: " + minWidthWideShape);
    LOG.debug("minHeightWideShape: " + minHeightWideShape);
    LOG.debug("minHeightTallShape: " + minHeightTallShape);
    LOG.debug("maxWidthTallShape: " + maxWidthTallShape);

    List<Shape> largeShapes = new ArrayList<Shape>();
    List<Shape> horizontalRules = new ArrayList<Shape>();
    for (Shape shape : shapes) {
        if (shape.getHeight() > minHeightBigShape) {
            LOG.debug("Removing " + shape + " (height)");
            largeShapes.add(shape);
        } else if (shape.getWidth() > minWidthWideShape && shape.getHeight() > minHeightWideShape) {
            // we don't want to remove horizontal bars, but we do want to remove other shapes.
            // why not? I suppose horizontal bars are easily represented as characters?
            LOG.debug("Removing " + shape + " (width)");
            largeShapes.add(shape);
        } else if (shape.getWidth() > minWidthWideShape) {
            // ok, we will remove horizontal rules after all
            LOG.debug("Removing " + shape + " (horizontal rule)");
            largeShapes.add(shape);
            horizontalRules.add(shape);
        } else if (shape.getWidth() <= maxWidthTallShape && shape.getHeight() > minHeightTallShape) {
            LOG.debug("Removing " + shape + " (narrow)");
            largeShapes.add(shape);
        }
    }

    // Only want to remove enclosed shapes if the large shape isn't a frame/grid
    // A) first reduce the shape by 5 percent and see it's cardinality reduces vastly (in which case it's a frame)
    // if so, don't remove enclosed shapes
    // B) next, detect white rectangles within the shape - if they're big enough, don't remove enclosed shapes      LOG.debug("Are large shapes frames or illustrations?");
    double maxFrameCardinalityRatio = 0.5;
    double minFrameWhiteAreaSizeRatio = 0.9;
    List<Shape> illustrations = new ArrayList<Shape>(largeShapes);
    for (Shape largeShape : largeShapes) {
        LOG.debug(largeShape.toString());
        int xOrigin = largeShape.getStartingPoint()[0] - largeShape.getLeft();
        int yOrigin = largeShape.getStartingPoint()[1] - largeShape.getTop();
        Shape dummyShape = graphicsService.getDot(sourceImage, xOrigin, yOrigin);
        // We want to fill up a mirror of the contiguous pixels within this shape,
        // which is what we'll use for further analysis to know
        // if it's a frame or not.
        WritableImageGrid mirror = graphicsService.getEmptyMirror(largeShape);
        this.findContiguousPixels(largeShape, mirror, dummyShape, xOrigin, yOrigin,
                sourceImage.getSeparationThreshold());

        int adjustedLeft = (int) Math.round((double) mirror.getWidth() * 0.05);
        int adjustedRight = (int) Math.round((double) mirror.getWidth() * 0.95);
        int adjustedTop = (int) Math.round((double) mirror.getHeight() * 0.05);
        int adjustedBottom = (int) Math.round((double) mirror.getHeight() * 0.95);

        int cardinality = 0;
        int innerCardinality = 0;
        for (int x = 0; x < mirror.getWidth(); x++) {
            for (int y = 0; y < mirror.getHeight(); y++) {
                if (mirror.getPixel(x, y) > 0) {
                    cardinality++;
                    if (x >= adjustedLeft && x <= adjustedRight && y >= adjustedTop && y <= adjustedBottom)
                        innerCardinality++;
                }
            }
        }

        LOG.debug("cardinality: " + cardinality);
        LOG.debug("innerCardinality: " + innerCardinality);
        double ratio = (double) innerCardinality / (double) cardinality;
        LOG.debug("ratio: " + ratio);
        if (ratio <= maxFrameCardinalityRatio) {
            LOG.debug("maxFrameCardinalityRatio: " + maxFrameCardinalityRatio);
            LOG.debug("Frame by cardinality! Removing from illustrations");
            illustrations.remove(largeShape);
        } else {
            // Now, it could still be a grid
            // to find this out we need to detect white areas inside the shape.
            WhiteAreaFinder whiteAreaFinder = new WhiteAreaFinder();
            double minWhiteAreaWidth = widthMean * 10;
            double minWhiteAreaHeight = heightMean * 4;
            List<Rectangle> whiteAreas = whiteAreaFinder.getWhiteAreas(mirror, 0, 0, 0, mirror.getWidth() - 1,
                    mirror.getHeight() - 1, minWhiteAreaWidth, minWhiteAreaHeight);
            int whiteAreaSize = 0;
            for (Rectangle whiteArea : whiteAreas) {
                whiteAreaSize += (whiteArea.getWidth() * whiteArea.getHeight());
            }

            int totalSize = mirror.getWidth() * mirror.getHeight();
            LOG.debug("whiteAreaSize: " + whiteAreaSize);
            LOG.debug("totalSize: " + totalSize);

            double sizeRatio = (double) whiteAreaSize / (double) totalSize;
            LOG.debug("sizeRatio: " + sizeRatio);

            if (sizeRatio >= minFrameWhiteAreaSizeRatio) {
                LOG.debug("minFrameWhiteAreaSizeRatio: " + minFrameWhiteAreaSizeRatio);
                LOG.debug("Frame by white area size! Removing from illustrations");
                illustrations.remove(largeShape);
            }

        }
    }

    for (Shape largeShape : illustrations) {
        // Add this to large shapes if it's not a "frame"
        // large shapes are used for paragraph detection
        sourceImage.getLargeShapes().add(largeShape);
    }

    // remove shapes that are enclosed inside illustrations
    List<Shape> enclosedShapesToDelete = new ArrayList<Shape>();
    int extension = 5;
    for (Shape shape : shapes) {
        for (Shape shapeToDelete : illustrations) {
            if (shape.getLeft() >= shapeToDelete.getLeft() - extension
                    && shape.getRight() <= shapeToDelete.getRight() + extension
                    && shape.getTop() >= shapeToDelete.getTop() - extension
                    && shape.getBottom() <= shapeToDelete.getBottom() + extension) {
                LOG.debug("Enclosed shape: " + shape);
                LOG.debug(" enclosed by " + shapeToDelete);
                enclosedShapesToDelete.add(shape);
            }
        }
    }

    shapes.removeAll(largeShapes);
    shapes.removeAll(enclosedShapesToDelete);

    // remove shapes that are practically touching horizontal rules (probably segments of the rule that got split)
    extension = 3;
    List<Shape> listToTestAgainst = horizontalRules;
    for (int i = 0; i < 3; i++) {
        List<Shape> horizontalRuleSegments = new ArrayList<Shape>();
        for (Shape horizontalRule : listToTestAgainst) {
            for (Shape shape : shapes) {
                if ((shape.getLeft() <= horizontalRule.getRight() + extension
                        || shape.getRight() >= horizontalRule.getLeft() - extension)
                        && shape.getTop() >= horizontalRule.getTop() - extension
                        && shape.getBottom() <= horizontalRule.getBottom() + extension) {
                    LOG.debug("Horizontal rule segment: " + shape);
                    LOG.debug(" touching " + horizontalRule);
                    horizontalRuleSegments.add(shape);
                    enclosedShapesToDelete.add(shape);
                }
            }
        }
        shapes.removeAll(horizontalRuleSegments);
        listToTestAgainst = horizontalRuleSegments;
        if (listToTestAgainst.size() == 0)
            break;
    }

}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

void splitShapes(SourceImage sourceImage, int fillFactor) {
    LOG.debug("########## splitShapes #########");
    // Cluster rows into rows of a similar height
    // Once we have this, we look for any shapes that are wider than average
    // and attempt to split them by looking for any bridges that are considerable thinner
    // than the stroke thickness and yet have big pixel counts on either side.

    // In order to split, we need four parameters
    // 1) minShapeWidth: the minimum shape width to consider for a split
    // 2) maxBridgeWidth: the maximum bridge width to use as a dividing bridge between two shapes when splitting
    // 3) minLetterWeight: the minimum pixel count that can represent a separate letter when splitting
    // 4) maxHorizontalOverlap: the maximum horizontal overlap between the left-hand and right-hand shape

    // These parameters are different for different font sizes
    // Therefore, we first need to group the rows on the image into clusters by height

    double imageShapeMean = sourceImage.getAverageShapeWidth();
    double maxWidthForSplit = imageShapeMean * 6.0; // avoid splitting horizontal rules!

    Set<Set<RowOfShapes>> rowClusters = sourceImage.getRowClusters();
    for (Set<RowOfShapes> rowCluster : rowClusters) {
        LOG.debug("Analysing row cluster");
        // 1) minShapeWidth: calculate the minimum shape width to be considered for splitting

        // first get the mean
        Mean meanWidth = new Mean();
        List<Shape> shapes = new ArrayList<Shape>();
        for (RowOfShapes row : rowCluster) {
            for (Shape shape : row.getShapes()) {
                meanWidth.increment(shape.getWidth());
                shapes.add(shape);/*from   ww  w.ja va 2 s  .co m*/
            }
        }
        double shapeWidthMean = meanWidth.getResult();
        LOG.debug("Mean width: " + shapeWidthMean);
        meanWidth.clear();

        // Note: there is much trial and error for these numbers
        // but the general guideline is that it is easier to deal downstream
        // with bad joins than with bad splits
        // so we prefer to err on the upper side
        double fillFactorScale = 0.15 * fillFactor;
        double widthForSplittingLower = shapeWidthMean * (1.6 + fillFactorScale);
        double widthForSplittingUpper = shapeWidthMean * (2.2 + fillFactorScale);

        LOG.debug("widthForSplittingLower: " + widthForSplittingLower);
        LOG.debug("widthForSplittingUpper: " + widthForSplittingUpper);
        LOG.debug("maxWidthForSplit: " + maxWidthForSplit);
        List<Shape> candidates = new ArrayList<Shape>();
        for (RowOfShapes row : rowCluster) {
            LOG.debug("Next row " + row.getIndex());
            for (Shape shape : row.getShapes()) {
                LOG.trace("Shape width " + shape.getWidth());
                if (shape.getWidth() > widthForSplittingLower && shape.getWidth() < maxWidthForSplit) {
                    candidates.add(shape);
                    LOG.debug("Found candidate with width " + shape.getWidth() + ": " + shape);
                }
            }
        }

        if (candidates.size() > 0) {
            // we'll take a random sampling of shapes for the next parameters
            int sampleSize = 30;
            List<Shape> sample = this.getSample(rowCluster, sampleSize, true);

            Mean meanPixelCount = new Mean();
            Vectorizer vectorizer = this.graphicsService.getVectorizer();
            List<Integer> thicknesses = new ArrayList<Integer>();
            for (Shape shape : sample) {
                BitSet bitset = shape.getBlackAndWhiteBitSet(sourceImage.getSeparationThreshold(), 0);
                meanPixelCount.increment(bitset.cardinality());
                List<LineSegment> vectors = vectorizer.vectorize(shape);

                int height = shape.getHeight();
                int sampleStep = (int) Math.ceil(height / 8);

                for (LineSegment vector : vectors) {
                    List<Integer> vectorThickness = vector.getLineDefinition().findArrayListThickness(shape,
                            vector.getStartX(), vector.getStartY(), vector.getLength(),
                            sourceImage.getSeparationThreshold(), 0, sampleStep);
                    thicknesses.addAll(vectorThickness);
                }

            }

            double pixelCountMean = meanPixelCount.getResult();

            Mean meanThickness = new Mean();
            for (int thickness : thicknesses) {
                meanThickness.increment(thickness);
            }
            double thicknessMean = meanThickness.getResult();

            meanThickness = new Mean();
            for (int thickness : thicknesses) {
                if (thickness < thicknessMean)
                    meanThickness.increment(thickness);
            }

            thicknessMean = meanThickness.getResult();
            LOG.debug("thicknessMean: " + thicknessMean);

            // 2) maxBridgeWidth: the maximum bridge width to use as a dividing bridge between two shapes when splitting
            double maxBridgeWidthLower = thicknessMean * 0.5;
            double maxBridgeWidthUpper = thicknessMean * 0.8;
            LOG.debug("maxBridgeWidthLower: " + maxBridgeWidthLower);
            LOG.debug("maxBridgeWidthUpper: " + maxBridgeWidthUpper);

            // 3) minLetterWeight: the minimum pixel count that can represent a separate letter when splitting
            int minLetterWeight = (int) Math.floor(pixelCountMean / 4.0);
            LOG.debug("minLetterWeight: " + minLetterWeight);

            // 4) maxHorizontalOverlap: the maximum horizontal overlap between the left-hand and right-hand shape
            int maxOverlap = (int) Math.ceil(shapeWidthMean / 8.0);
            LOG.debug("maxOverlap: " + maxOverlap);

            Map<Shape, List<Shape>> shapesToSplit = new Hashtable<Shape, List<Shape>>();
            for (Shape candidate : candidates) {
                LOG.debug("Trying to split candidate " + candidate);
                for (int y = 0; y < candidate.getHeight(); y++) {
                    String line = "";
                    if (y == candidate.getMeanLine())
                        line += "M";
                    else if (y == candidate.getBaseLine())
                        line += "B";
                    else
                        line += y;
                    for (int x = 0; x < candidate.getWidth(); x++) {
                        if (candidate.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
                            line += "x";
                        else
                            line += "o";
                    }
                    LOG.debug(line);
                }
                if (candidate.getHeight() < 3.0 * maxBridgeWidthUpper) {
                    LOG.debug("Shape too narrow - probably a long dash.");
                    continue;
                }
                int maxBridgeWidth;
                if (candidate.getWidth() > widthForSplittingUpper)
                    maxBridgeWidth = (int) Math.ceil(maxBridgeWidthUpper);
                else {
                    // since many bridges are thicker than expected
                    // add a rule that the thicker the bridge is, the wider the image needs to be
                    maxBridgeWidth = (int) Math.ceil(
                            maxBridgeWidthLower + (((double) candidate.getWidth() - widthForSplittingLower)
                                    / (widthForSplittingUpper - widthForSplittingLower)
                                    * (maxBridgeWidthUpper - maxBridgeWidthLower)));
                }
                List<Shape> splitShapes = this.splitShape(candidate, sourceImage, maxBridgeWidth,
                        minLetterWeight, maxOverlap);
                if (splitShapes.size() > 1) {
                    LOG.debug("Split found");
                    for (Shape splitShape : splitShapes) {
                        splitShape.setRow(candidate.getRow());
                    }
                    shapesToSplit.put(candidate, splitShapes);
                }
            }

            LOG.debug("Replacing shapes with split shapes");
            List<RowOfShapes> rowsToReorder = new ArrayList<RowOfShapes>();
            for (Shape shape : shapesToSplit.keySet()) {
                List<Shape> newShapes = shapesToSplit.get(shape);
                RowOfShapes row = shape.getRow();
                row.removeShape(shape);
                row.addShapes(newShapes);
                rowsToReorder.add(row);
            }

            for (RowOfShapes row : rowsToReorder)
                row.reorderShapes();
        }
    }
    LOG.debug("splitShapes complete");
}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

/**
 * Split rows if they're particularly high, and contain considerable white space in the middle.
 * Shapes causing the join will be removed if too high, or attached to the closest row otherwise.
 * @param sourceImage/*from w  ww . j  a v  a  2s  .  c  o m*/
 * @param regressions
 * @return
 */
void splitRows(SourceImage sourceImage) {
    LOG.debug("########## splitRows #########");

    // Calculate the min row height to be considered for splitting
    double minHeightForSplit = sourceImage.getAverageShapeHeight();
    LOG.debug("minHeightForSplit: " + minHeightForSplit);

    double slopeMean = sourceImage.getMeanHorizontalSlope();

    List<RowOfShapes> candidateRows = new ArrayList<RowOfShapes>();
    for (RowOfShapes row : sourceImage.getRows()) {
        if (row.getRight() == row.getLeft())
            continue;
        int height = row.getBottom() - row.getTop();
        if (height >= minHeightForSplit) {
            LOG.debug("Adding candidate " + row.toString());
            candidateRows.add(row);
        }
    }

    // For each row to be considered for splitting, see if there are lines of white space inside it.
    Hashtable<RowOfShapes, List<RowOfShapes>> splitRows = new Hashtable<RowOfShapes, List<RowOfShapes>>();
    for (RowOfShapes row : candidateRows) {
        SimpleRegression regression = new SimpleRegression();
        // y = intercept + slope * x 
        LOG.debug("Left point: (" + row.getLeft() + " , " + row.getTop() + ")");
        regression.addData(row.getLeft(), row.getTop());
        double rightHandY = row.getTop() + ((double) (row.getRight() - row.getLeft()) * slopeMean);
        LOG.debug("Right point: (" + row.getRight() + " , " + rightHandY + ")");
        regression.addData(row.getRight(), rightHandY);

        int yDelta = (int) Math.ceil(Math.abs(rightHandY - (double) row.getTop()));
        int yInterval = yDelta + (row.getBottom() - row.getTop() + 1) + yDelta;

        LOG.debug("yDelta: " + yDelta);
        LOG.debug("yInterval: " + yInterval);
        // let's get pixel counts shape by shape, and leave out the rest (in case rows overlap vertically)
        int[] pixelCounts = new int[yInterval];
        for (Shape shape : row.getShapes()) {
            LOG.trace("Shape " + shape);
            int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft()));
            LOG.trace("yDeltaAtLeft: " + yDeltaAtLeft);
            // the shape offset + the offset between the regression line and the row top
            // + the delta we left at the start in case the line slopes upwards to the right
            int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta;
            LOG.trace("topIndex: (" + shape.getTop() + " - " + row.getTop() + ") + (" + row.getTop() + " - "
                    + yDeltaAtLeft + ") + " + yDelta + " = " + topIndex);
            for (int x = 0; x < shape.getWidth(); x++) {
                for (int y = 0; y < shape.getHeight(); y++) {
                    if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold())) {
                        pixelCounts[topIndex + y]++;
                    }
                }
            }
        }

        Mean pixelCountMean = new Mean();
        StandardDeviation pixelCountStdDev = new StandardDeviation();
        for (int i = 0; i < yInterval; i++) {
            LOG.debug("Pixel count " + i + ": " + pixelCounts[i]);
            pixelCountMean.increment(pixelCounts[i]);
            pixelCountStdDev.increment(pixelCounts[i]);
        }
        LOG.debug("pixel count mean: " + pixelCountMean.getResult() + ", std dev: "
                + pixelCountStdDev.getResult());

        // If there's a split required, we're going to go considerably above and below the mean several times
        double lowThreshold = pixelCountMean.getResult() / 2.0;
        double highThreshold = pixelCountMean.getResult() * 2.0;
        boolean inRow = false;
        List<Integer> switches = new ArrayList<Integer>();
        for (int i = 0; i < yInterval; i++) {
            if (!inRow && pixelCounts[i] > highThreshold) {
                LOG.debug("In row at " + i + ", pixel count " + pixelCounts[i]);
                inRow = true;
                switches.add(i);
            } else if (inRow && pixelCounts[i] < lowThreshold) {
                LOG.debug("Out of row at " + i + ", pixel count " + pixelCounts[i]);
                inRow = false;
                switches.add(i);
            }
        }
        if (switches.size() > 2) {
            // we have more than one row
            List<Integer> rowSeparations = new ArrayList<Integer>();

            // find the row separators
            for (int switchIndex = 1; switchIndex < switches.size() - 2; switchIndex = switchIndex + 2) {
                int outOfRow = switches.get(switchIndex);
                int intoRow = switches.get(switchIndex + 1);
                int minPixelCount = (int) Math.ceil(highThreshold);
                int minIndex = -1;
                // find the row with the lowest pixel count
                for (int i = outOfRow; i <= intoRow; i++) {
                    if (pixelCounts[i] < minPixelCount) {
                        minPixelCount = pixelCounts[i];
                        minIndex = i;
                    }
                }
                rowSeparations.add(minIndex);
            }

            // separate the shapes among the rows
            List<RowOfShapes> newRows = new ArrayList<RowOfShapes>(rowSeparations.size() + 1);
            for (int i = 0; i <= rowSeparations.size(); i++) {
                newRows.add(graphicsService.getEmptyRow(sourceImage));
            }

            // add a separator at the beginning and end
            rowSeparations.add(0, 0);
            rowSeparations.add(yInterval + 1);
            for (Shape shape : row.getShapes()) {
                int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft()));
                int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta;
                int firstSepAfterShapeBottom = rowSeparations.size();
                int lastSepBeforeShapeTop = -1;

                for (int i = rowSeparations.size() - 1; i >= 0; i--) {
                    int rowSeparation = rowSeparations.get(i);
                    if (rowSeparation <= topIndex) {
                        lastSepBeforeShapeTop = i;
                        break;
                    }
                }

                for (int i = 0; i < rowSeparations.size(); i++) {
                    int rowSeparation = rowSeparations.get(i);
                    if (rowSeparation >= topIndex + shape.getHeight()) {
                        firstSepAfterShapeBottom = i;
                        break;
                    }
                }

                if (lastSepBeforeShapeTop == firstSepAfterShapeBottom - 1) {
                    // shape clearly belongs to one row
                    RowOfShapes newRow = newRows.get(lastSepBeforeShapeTop);
                    newRow.addShape(shape);
                } else {
                    // is the shape much closer to one row than another?
                    // if yes, add it to then add it to this row
                    int[] yPixelsPerRow = new int[newRows.size()];
                    for (int i = 0; i < newRows.size(); i++) {
                        int separatorTop = rowSeparations.get(i);
                        int separatorBottom = rowSeparations.get(i + 1);
                        int top = topIndex < separatorTop ? separatorTop : topIndex;
                        int bottom = topIndex + shape.getHeight() < separatorBottom
                                ? topIndex + shape.getHeight()
                                : separatorBottom;
                        yPixelsPerRow[i] = bottom - top;
                    }

                    int pixelsInMaxRow = 0;
                    int maxPixelRowIndex = -1;
                    for (int i = 0; i < newRows.size(); i++) {
                        if (yPixelsPerRow[i] > pixelsInMaxRow) {
                            pixelsInMaxRow = yPixelsPerRow[i];
                            maxPixelRowIndex = i;
                        }
                    }
                    double minPercentage = 0.8;
                    if (((double) pixelsInMaxRow / (double) shape.getHeight()) >= minPercentage) {
                        RowOfShapes newRow = newRows.get(maxPixelRowIndex);
                        newRow.addShape(shape);
                    } else {
                        // otherwise, the shape needs to be got rid of
                        // as it's causing massive confusion
                        // do this by simply not adding it anywhere
                    }
                } // is the shape in one row exactly?
            } // next shape
            splitRows.put(row, newRows);
        } // do we have more than one row?
    } // next row

    for (RowOfShapes row : splitRows.keySet()) {
        List<RowOfShapes> newRows = splitRows.get(row);
        sourceImage.replaceRow(row, newRows);
    }
}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

void groupShapesIntoWords(Set<RowOfShapes> rowCluster) {
    LOG.debug("Next row cluster of size " + rowCluster.size());
    // group the shapes together into words
    Mean spaceMean = new Mean();
    StandardDeviation spaceStdDev = new StandardDeviation();
    int maxSpaceLog = 120;
    int[] spaceCounts = new int[maxSpaceLog];
    List<Integer> spaces = new ArrayList<Integer>();

    for (RowOfShapes row : rowCluster) {
        Shape previousShape = null;
        for (Shape shape : row.getShapes()) {
            if (previousShape != null) {
                int space = 0;
                if (sourceImage.isLeftToRight())
                    space = shape.getLeft() - previousShape.getRight();
                else
                    space = previousShape.getLeft() - shape.getRight();
                LOG.trace(shape);/*  w  w  w.j ava 2  s.  c o  m*/
                LOG.trace("Space : " + space);
                if (space < maxSpaceLog && space >= 0)
                    spaceCounts[space]++;
                if (space >= 0) {
                    spaces.add(space);
                    spaceMean.increment(space);
                    spaceStdDev.increment(space);
                }
            }
            previousShape = shape;
        } // next shape
    }

    for (int i = 0; i < maxSpaceLog; i++) {
        //LOG.debug("Space count " + i + ": " + spaceCounts[i]);
    }
    double spaceMeanVal = spaceMean.getResult();
    double spaceStdDevVal = spaceStdDev.getResult();
    LOG.debug("Space mean: " + spaceMeanVal);
    LOG.debug("Space std dev: " + spaceStdDevVal);

    // If however there is only a single word on the row, the
    // standard deviation will be very low.
    boolean singleWord = false;
    if (spaceStdDevVal * 2 < spaceMeanVal) {
        LOG.debug("Assuming a single word per row");
        singleWord = true;
    }

    // Since there should be two groups, one for letters and one for words,
    // the mean should be somewhere in between. We now look for the mean on the
    // lesser group and will use it as the basis for comparison.
    spaceMean = new Mean();
    spaceStdDev = new StandardDeviation();
    for (int space : spaces) {
        if (space < spaceMeanVal && space >= 0) {
            spaceMean.increment(space);
            spaceStdDev.increment(space);
        }
    }
    spaceMeanVal = spaceMean.getResult();
    spaceStdDevVal = spaceStdDev.getResult();
    LOG.debug("Letter space mean: " + spaceMeanVal);
    LOG.debug("Letter space std dev: " + spaceStdDevVal);

    int letterSpaceThreshold = 0;
    if (singleWord)
        letterSpaceThreshold = Integer.MAX_VALUE;
    else
        letterSpaceThreshold = (int) Math.round(spaceMeanVal + (4.0 * spaceStdDevVal));

    for (RowOfShapes row : rowCluster) {
        LOG.debug(row.toString());
        //row.getGroups().clear();
        row.organiseShapesInGroups(letterSpaceThreshold);
    } // next row      
}