Example usage for org.apache.commons.math.stat.descriptive.moment Mean clear

List of usage examples for org.apache.commons.math.stat.descriptive.moment Mean clear

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive.moment Mean clear.

Prototype

@Override
public void clear() 

Source Link

Usage

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

void splitShapes(SourceImage sourceImage, int fillFactor) {
    LOG.debug("########## splitShapes #########");
    // Cluster rows into rows of a similar height
    // Once we have this, we look for any shapes that are wider than average
    // and attempt to split them by looking for any bridges that are considerable thinner
    // than the stroke thickness and yet have big pixel counts on either side.

    // In order to split, we need four parameters
    // 1) minShapeWidth: the minimum shape width to consider for a split
    // 2) maxBridgeWidth: the maximum bridge width to use as a dividing bridge between two shapes when splitting
    // 3) minLetterWeight: the minimum pixel count that can represent a separate letter when splitting
    // 4) maxHorizontalOverlap: the maximum horizontal overlap between the left-hand and right-hand shape

    // These parameters are different for different font sizes
    // Therefore, we first need to group the rows on the image into clusters by height

    double imageShapeMean = sourceImage.getAverageShapeWidth();
    double maxWidthForSplit = imageShapeMean * 6.0; // avoid splitting horizontal rules!

    Set<Set<RowOfShapes>> rowClusters = sourceImage.getRowClusters();
    for (Set<RowOfShapes> rowCluster : rowClusters) {
        LOG.debug("Analysing row cluster");
        // 1) minShapeWidth: calculate the minimum shape width to be considered for splitting

        // first get the mean
        Mean meanWidth = new Mean();
        List<Shape> shapes = new ArrayList<Shape>();
        for (RowOfShapes row : rowCluster) {
            for (Shape shape : row.getShapes()) {
                meanWidth.increment(shape.getWidth());
                shapes.add(shape);/*from  w w  w.  j  av a  2  s. c o m*/
            }
        }
        double shapeWidthMean = meanWidth.getResult();
        LOG.debug("Mean width: " + shapeWidthMean);
        meanWidth.clear();

        // Note: there is much trial and error for these numbers
        // but the general guideline is that it is easier to deal downstream
        // with bad joins than with bad splits
        // so we prefer to err on the upper side
        double fillFactorScale = 0.15 * fillFactor;
        double widthForSplittingLower = shapeWidthMean * (1.6 + fillFactorScale);
        double widthForSplittingUpper = shapeWidthMean * (2.2 + fillFactorScale);

        LOG.debug("widthForSplittingLower: " + widthForSplittingLower);
        LOG.debug("widthForSplittingUpper: " + widthForSplittingUpper);
        LOG.debug("maxWidthForSplit: " + maxWidthForSplit);
        List<Shape> candidates = new ArrayList<Shape>();
        for (RowOfShapes row : rowCluster) {
            LOG.debug("Next row " + row.getIndex());
            for (Shape shape : row.getShapes()) {
                LOG.trace("Shape width " + shape.getWidth());
                if (shape.getWidth() > widthForSplittingLower && shape.getWidth() < maxWidthForSplit) {
                    candidates.add(shape);
                    LOG.debug("Found candidate with width " + shape.getWidth() + ": " + shape);
                }
            }
        }

        if (candidates.size() > 0) {
            // we'll take a random sampling of shapes for the next parameters
            int sampleSize = 30;
            List<Shape> sample = this.getSample(rowCluster, sampleSize, true);

            Mean meanPixelCount = new Mean();
            Vectorizer vectorizer = this.graphicsService.getVectorizer();
            List<Integer> thicknesses = new ArrayList<Integer>();
            for (Shape shape : sample) {
                BitSet bitset = shape.getBlackAndWhiteBitSet(sourceImage.getSeparationThreshold(), 0);
                meanPixelCount.increment(bitset.cardinality());
                List<LineSegment> vectors = vectorizer.vectorize(shape);

                int height = shape.getHeight();
                int sampleStep = (int) Math.ceil(height / 8);

                for (LineSegment vector : vectors) {
                    List<Integer> vectorThickness = vector.getLineDefinition().findArrayListThickness(shape,
                            vector.getStartX(), vector.getStartY(), vector.getLength(),
                            sourceImage.getSeparationThreshold(), 0, sampleStep);
                    thicknesses.addAll(vectorThickness);
                }

            }

            double pixelCountMean = meanPixelCount.getResult();

            Mean meanThickness = new Mean();
            for (int thickness : thicknesses) {
                meanThickness.increment(thickness);
            }
            double thicknessMean = meanThickness.getResult();

            meanThickness = new Mean();
            for (int thickness : thicknesses) {
                if (thickness < thicknessMean)
                    meanThickness.increment(thickness);
            }

            thicknessMean = meanThickness.getResult();
            LOG.debug("thicknessMean: " + thicknessMean);

            // 2) maxBridgeWidth: the maximum bridge width to use as a dividing bridge between two shapes when splitting
            double maxBridgeWidthLower = thicknessMean * 0.5;
            double maxBridgeWidthUpper = thicknessMean * 0.8;
            LOG.debug("maxBridgeWidthLower: " + maxBridgeWidthLower);
            LOG.debug("maxBridgeWidthUpper: " + maxBridgeWidthUpper);

            // 3) minLetterWeight: the minimum pixel count that can represent a separate letter when splitting
            int minLetterWeight = (int) Math.floor(pixelCountMean / 4.0);
            LOG.debug("minLetterWeight: " + minLetterWeight);

            // 4) maxHorizontalOverlap: the maximum horizontal overlap between the left-hand and right-hand shape
            int maxOverlap = (int) Math.ceil(shapeWidthMean / 8.0);
            LOG.debug("maxOverlap: " + maxOverlap);

            Map<Shape, List<Shape>> shapesToSplit = new Hashtable<Shape, List<Shape>>();
            for (Shape candidate : candidates) {
                LOG.debug("Trying to split candidate " + candidate);
                for (int y = 0; y < candidate.getHeight(); y++) {
                    String line = "";
                    if (y == candidate.getMeanLine())
                        line += "M";
                    else if (y == candidate.getBaseLine())
                        line += "B";
                    else
                        line += y;
                    for (int x = 0; x < candidate.getWidth(); x++) {
                        if (candidate.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
                            line += "x";
                        else
                            line += "o";
                    }
                    LOG.debug(line);
                }
                if (candidate.getHeight() < 3.0 * maxBridgeWidthUpper) {
                    LOG.debug("Shape too narrow - probably a long dash.");
                    continue;
                }
                int maxBridgeWidth;
                if (candidate.getWidth() > widthForSplittingUpper)
                    maxBridgeWidth = (int) Math.ceil(maxBridgeWidthUpper);
                else {
                    // since many bridges are thicker than expected
                    // add a rule that the thicker the bridge is, the wider the image needs to be
                    maxBridgeWidth = (int) Math.ceil(
                            maxBridgeWidthLower + (((double) candidate.getWidth() - widthForSplittingLower)
                                    / (widthForSplittingUpper - widthForSplittingLower)
                                    * (maxBridgeWidthUpper - maxBridgeWidthLower)));
                }
                List<Shape> splitShapes = this.splitShape(candidate, sourceImage, maxBridgeWidth,
                        minLetterWeight, maxOverlap);
                if (splitShapes.size() > 1) {
                    LOG.debug("Split found");
                    for (Shape splitShape : splitShapes) {
                        splitShape.setRow(candidate.getRow());
                    }
                    shapesToSplit.put(candidate, splitShapes);
                }
            }

            LOG.debug("Replacing shapes with split shapes");
            List<RowOfShapes> rowsToReorder = new ArrayList<RowOfShapes>();
            for (Shape shape : shapesToSplit.keySet()) {
                List<Shape> newShapes = shapesToSplit.get(shape);
                RowOfShapes row = shape.getRow();
                row.removeShape(shape);
                row.addShapes(newShapes);
                rowsToReorder.add(row);
            }

            for (RowOfShapes row : rowsToReorder)
                row.reorderShapes();
        }
    }
    LOG.debug("splitShapes complete");
}