Example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics

List of usage examples for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics.

Prototype

public DescriptiveStatistics() 

Source Link

Document

Construct a DescriptiveStatistics instance with an infinite window

Usage

From source file:com.joliciel.jochre.graphics.SourceImageImpl.java

void calculateShapeStatistics() {
    if (!shapeStatisticsCalculated) {
        DescriptiveStatistics shapeWidthStats = new DescriptiveStatistics();
        DescriptiveStatistics shapeHeightStats = new DescriptiveStatistics();

        for (RowOfShapes row : this.getRows()) {
            for (Shape shape : row.getShapes()) {
                shapeWidthStats.addValue(shape.getWidth());
                shapeHeightStats.addValue(shape.getHeight());
            }//from   w  w  w.j a va2  s  .  co m
        }

        double minWidth = shapeWidthStats.getPercentile(50);
        double maxWidth = shapeWidthStats.getPercentile(80);
        double minHeight = shapeHeightStats.getPercentile(50);
        double maxHeight = shapeHeightStats.getPercentile(80);
        this.averageShapeWidth = shapeWidthStats.getPercentile(65);
        this.averageShapeHeight = shapeHeightStats.getPercentile(65);

        this.averageShapeWidthMargin = (maxWidth - minWidth) / 2.0;
        this.averageShapeHeightMargin = (maxHeight - minHeight) / 2.0;

        this.shapeStatisticsCalculated = true;
    }
}

From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.heatmaps.HeatMapTask.java

private void scale(double[][] peakList) {
    DescriptiveStatistics stdDevStats = new DescriptiveStatistics();

    for (int columns = 0; columns < peakList.length; columns++) {
        stdDevStats.clear();//  w  w w.  j  ava 2s  .c  o m
        for (int row = 0; row < peakList[columns].length; row++) {
            if (!Double.isInfinite(peakList[columns][row]) && !Double.isNaN(peakList[columns][row])) {
                stdDevStats.addValue(peakList[columns][row]);
            }
        }

        double stdDev = stdDevStats.getStandardDeviation();

        for (int row = 0; row < peakList[columns].length; row++) {
            if (stdDev != 0) {
                peakList[columns][row] = peakList[columns][row] / stdDev;
            }
        }
    }
}

From source file:com.joliciel.jochre.graphics.RowOfShapesImpl.java

void calculateShapeStatistics() {
    if (!shapeStatisticsCalculated) {
        DescriptiveStatistics shapeWidthStats = new DescriptiveStatistics();
        DescriptiveStatistics shapeHeightStats = new DescriptiveStatistics();

        for (Shape shape : this.getShapes()) {
            shapeWidthStats.addValue(shape.getWidth());
            shapeHeightStats.addValue(shape.getHeight());
        }// www  .jav a  2  s  .  co  m

        double minWidth = shapeWidthStats.getPercentile(33);
        double maxWidth = shapeWidthStats.getPercentile(66);
        double minHeight = shapeHeightStats.getPercentile(33);
        double maxHeight = shapeHeightStats.getPercentile(66);
        this.averageShapeWidth = shapeWidthStats.getPercentile(50);
        this.averageShapeHeight = shapeHeightStats.getPercentile(50);

        this.averageShapeWidthMargin = (maxWidth - minWidth) / 2.0;
        this.averageShapeHeightMargin = (maxHeight - minHeight) / 2.0;

        this.shapeStatisticsCalculated = true;
    }
}

From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.heatmaps.HeatMapTask.java

private double[][] groupingDataset(UserParameter selectedParameter, String referenceGroup) {
    // Collect all data files
    Vector<RawDataFile> allDataFiles = new Vector<RawDataFile>();
    DescriptiveStatistics meanControlStats = new DescriptiveStatistics();
    DescriptiveStatistics meanGroupStats = new DescriptiveStatistics();
    allDataFiles.addAll(Arrays.asList(peakList.getRawDataFiles()));

    // Determine the reference group and non reference group (the rest of
    // the samples) for raw data files
    List<RawDataFile> referenceDataFiles = new ArrayList<RawDataFile>();
    List<RawDataFile> nonReferenceDataFiles = new ArrayList<RawDataFile>();

    List<String> groups = new ArrayList<String>();
    MZmineProject project = MZmineCore.getCurrentProject();

    for (RawDataFile rawDataFile : allDataFiles) {

        Object paramValue = project.getParameterValue(selectedParameter, rawDataFile);
        if (!groups.contains(String.valueOf(paramValue))) {
            groups.add(String.valueOf(paramValue));
        }/*from   w  ww.  ja  v a 2  s  .c o m*/
        if (String.valueOf(paramValue).equals(referenceGroup)) {

            referenceDataFiles.add(rawDataFile);
        } else {

            nonReferenceDataFiles.add(rawDataFile);
        }
    }

    int numRows = 0;
    for (int row = 0; row < peakList.getNumberOfRows(); row++) {

        if (!onlyIdentified || (onlyIdentified && peakList.getRow(row).getPeakIdentities().length > 0)) {
            numRows++;
        }
    }

    // Create a new aligned peak list with all the samples if the reference
    // group has to be shown or with only
    // the non reference group if not.
    double[][] dataMatrix = new double[groups.size() - 1][numRows];
    pValueMatrix = new String[groups.size() - 1][numRows];

    // data files that should be in the heat map
    List<RawDataFile> shownDataFiles = nonReferenceDataFiles;

    for (int row = 0, rowIndex = 0; row < peakList.getNumberOfRows(); row++) {
        PeakListRow rowPeak = peakList.getRow(row);
        if (!onlyIdentified || (onlyIdentified && rowPeak.getPeakIdentities().length > 0)) {
            // Average area or height of the reference group
            meanControlStats.clear();
            for (int column = 0; column < referenceDataFiles.size(); column++) {

                if (rowPeak.getPeak(referenceDataFiles.get(column)) != null) {

                    if (area) {

                        meanControlStats.addValue(rowPeak.getPeak(referenceDataFiles.get(column)).getArea());
                    } else {

                        meanControlStats.addValue(rowPeak.getPeak(referenceDataFiles.get(column)).getHeight());
                    }

                }
            }

            // Divide the area or height of each peak by the average of the
            // area or height of the reference peaks in each row
            int columnIndex = 0;
            for (int column = 0; column < groups.size(); column++) {
                String group = groups.get(column);
                meanGroupStats.clear();
                if (!group.equals(referenceGroup)) {

                    for (int dataColumn = 0; dataColumn < shownDataFiles.size(); dataColumn++) {

                        Object paramValue = project.getParameterValue(selectedParameter,
                                shownDataFiles.get(dataColumn));
                        if (rowPeak.getPeak(shownDataFiles.get(dataColumn)) != null
                                && String.valueOf(paramValue).equals(group)) {

                            Feature peak = rowPeak.getPeak(shownDataFiles.get(dataColumn));

                            if (!Double.isInfinite(peak.getArea()) && !Double.isNaN(peak.getArea())) {

                                if (area) {

                                    meanGroupStats.addValue(peak.getArea());
                                } else {

                                    meanGroupStats.addValue(peak.getHeight());
                                }
                            }

                        }
                    }

                    double value = meanGroupStats.getMean() / meanControlStats.getMean();
                    if (meanGroupStats.getN() > 1 && meanControlStats.getN() > 1) {
                        pValueMatrix[columnIndex][rowIndex] = this.getPvalue(meanGroupStats, meanControlStats);
                    } else {
                        pValueMatrix[columnIndex][rowIndex] = "";
                    }

                    if (log) {

                        value = Math.log(value);
                    }
                    dataMatrix[columnIndex++][rowIndex] = value;
                }
            }
            rowIndex++;
        }
    }

    // Scale the data dividing the peak area/height by the standard
    // deviation of each column
    if (scale) {
        scale(dataMatrix);
    }

    // Create two arrays: row and column names
    rowNames = new String[dataMatrix[0].length];
    colNames = new String[groups.size() - 1];

    int columnIndex = 0;
    for (String group : groups) {

        if (!group.equals(referenceGroup)) {

            colNames[columnIndex++] = group;
        }
    }
    for (int row = 0, rowIndex = 0; row < peakList.getNumberOfRows(); row++) {
        if (!onlyIdentified || (onlyIdentified && peakList.getRow(row).getPeakIdentities().length > 0)) {
            if (peakList.getRow(row).getPeakIdentities() != null
                    && peakList.getRow(row).getPeakIdentities().length > 0) {

                rowNames[rowIndex++] = peakList.getRow(row).getPreferredPeakIdentity().getName();
            } else {

                rowNames[rowIndex++] = "Unknown";
            }
        }
    }

    return dataMatrix;
}

From source file:com.joliciel.jochre.graphics.RowOfShapesImpl.java

/**
 * The regression passes through the bottom of average shapes on this line.
 * It gives the line's slope, and a starting point for finding the baseline and meanline.
 *///from   ww w . ja  va 2 s  .co m
public SimpleRegression getRegression() {
    if (this.regression == null) {
        // begin by calculating some sort of average line crossing the whole row, so that we can see if the row is
        // rising or falling to start with?
        // Calculate the line crossing the mid-point of all "average" shapes on this row
        // get the "smoothed" linear approximation of the mid-points
        regression = new SimpleRegression();

        int numShapes = 0;
        int minShapes = 10;
        DescriptiveStatistics shapeWidthStats = new DescriptiveStatistics();
        DescriptiveStatistics shapeHeightStats = new DescriptiveStatistics();

        for (Shape shape : this.getShapes()) {
            shapeWidthStats.addValue(shape.getWidth());
            shapeHeightStats.addValue(shape.getHeight());
        }

        double minWidth = shapeWidthStats.getPercentile(25);
        double maxWidth = shapeWidthStats.getPercentile(75);
        double minHeight = shapeHeightStats.getPercentile(25);
        double maxHeight = shapeHeightStats.getPercentile(75);

        for (Shape shape : this.getShapes()) {
            // only add points whose shape is of "average" width and height (to leave out commas, etc.)
            if (shape.getWidth() >= minWidth && shape.getWidth() <= maxWidth && shape.getHeight() >= minHeight
                    && shape.getHeight() <= maxHeight) {

                // using bottom only, since rows with different font sizes tend to align bottom
                regression.addData((((double) shape.getLeft() + (double) shape.getRight()) / 2.0),
                        ((double) shape.getBottom()));
                numShapes++;
            }
        }

        // special case where row contains very few shapes (generally letter or number + period)
        boolean horizontalLine = false;
        if (numShapes < minShapes) {
            LOG.debug("Too few shapes: " + numShapes + ", assuming straight horizontal line");
            horizontalLine = true;
        } else if ((this.getRight() - this.getLeft()) < (this.getContainer().getWidth() / 6.0)) {
            LOG.debug("Too narrow: " + (this.getRight() - this.getLeft())
                    + ", assuming straight horizontal line");
            horizontalLine = true;
        }
        if (horizontalLine) {
            // assume a straight horizontal line
            Mean midPointMean = new Mean();
            for (Shape shape : this.getShapes()) {
                // only add points whose shape is of "average" height (to leave out commas, etc.)
                if (shape.getWidth() >= minWidth && shape.getWidth() <= maxWidth
                        && shape.getHeight() >= minHeight && shape.getHeight() <= maxHeight) {
                    midPointMean.increment((double) shape.getBottom());
                }
            }
            if (midPointMean.getN() == 0) {
                for (Shape shape : this.getShapes()) {
                    midPointMean.increment((double) shape.getBottom());
                }
            }
            double meanMidPoint = midPointMean.getResult();
            regression = new SimpleRegression();
            regression.addData(this.getLeft(), meanMidPoint);
            regression.addData(this.getRight(), meanMidPoint);
        }

        // displays intercept of regression line
        LOG.debug("intercept: " + regression.getIntercept());

        // displays slope of regression line
        LOG.debug("slope: " + regression.getSlope());

        // displays slope standard error
        LOG.debug("std err: " + regression.getSlopeStdErr());

        LOG.debug("x = 0, y = " + regression.predict(0));
        LOG.debug("x = " + this.getContainer().getWidth() + ", y = "
                + regression.predict(this.getContainer().getWidth()));
    }
    return regression;
}

From source file:com.joliciel.jochre.graphics.SourceImageImpl.java

@Override
public List<Rectangle> getWhiteAreas(List<Shape> shapes) {
    LOG.debug("#### getWhiteAreas ####");
    // Delimit area to be examined based on shapes
    int top = Integer.MAX_VALUE, bottom = 0, left = Integer.MAX_VALUE, right = 0;
    for (Shape shape : shapes) {
        if (shape.getTop() < top)
            top = shape.getTop();//from w  w w  .j a  v  a  2s  . co m
        if (shape.getBottom() > bottom)
            bottom = shape.getBottom();
        if (shape.getLeft() < left)
            left = shape.getLeft();
        if (shape.getRight() > right)
            right = shape.getRight();
    }

    // get average shape width & height
    DescriptiveStatistics shapeWidthStats = new DescriptiveStatistics();
    DescriptiveStatistics shapeHeightStats = new DescriptiveStatistics();

    for (Shape shape : shapes) {
        shapeWidthStats.addValue(shape.getWidth());
        shapeHeightStats.addValue(shape.getHeight());
    }

    double averageShapeWidth = shapeWidthStats.getPercentile(75);
    double averageShapeHeight = shapeHeightStats.getPercentile(75);
    LOG.debug("averageShapeWidth: " + averageShapeWidth);
    LOG.debug("averageShapeHeight: " + averageShapeHeight);

    List<Rectangle> whiteAreas = new ArrayList<Rectangle>();

    // Horizontal white areas
    double minHorizontalWhiteAreaWidth = 40.0 * averageShapeWidth;
    double minHorizontalWhiteAreaHeight = 2.5 * averageShapeHeight;
    LOG.debug("minHorizontalWhiteAreaWidth: " + minHorizontalWhiteAreaWidth);
    LOG.debug("minHorizontalWhiteAreaHeight: " + minHorizontalWhiteAreaHeight);

    WhiteAreaFinder whiteAreaFinder = new WhiteAreaFinder();
    List<Rectangle> blackAreas = new ArrayList<Rectangle>();
    blackAreas.addAll(shapes);

    List<Rectangle> horizontalWhiteAreas = whiteAreaFinder.getWhiteAreas(blackAreas, left, top, right, bottom,
            minHorizontalWhiteAreaWidth, minHorizontalWhiteAreaHeight);
    // we add the horizontal white areas to the "black areas", since we don't want vertical
    // white areas detected at page top & page bottom, splitting a valid row
    blackAreas.addAll(horizontalWhiteAreas);
    whiteAreas.addAll(horizontalWhiteAreas);

    // Long vertical white areas
    double minVerticalWhiteAreaWidth = 2.5 * averageShapeWidth;
    double minVerticalWhiteAreaHeight = 10.0 * averageShapeHeight;
    LOG.debug("minVerticalWhiteAreaWidth: " + minVerticalWhiteAreaWidth);
    LOG.debug("minVerticalWhiteAreaHeight: " + minVerticalWhiteAreaHeight);

    List<Rectangle> verticalWhiteAreas = whiteAreaFinder.getWhiteAreas(blackAreas, left, top, right, bottom,
            minVerticalWhiteAreaWidth, minVerticalWhiteAreaHeight);
    whiteAreas.addAll(verticalWhiteAreas);

    // Square white areas
    double minSquareWhiteAreaWidth = 4.0 * averageShapeWidth;
    double minSquareWhiteAreaHeight = 4.0 * averageShapeHeight;
    LOG.debug("minSquareWhiteAreaWidth: " + minSquareWhiteAreaWidth);
    LOG.debug("minSquareWhiteAreaHeight: " + minSquareWhiteAreaHeight);

    List<Rectangle> squareWhiteAreas = whiteAreaFinder.getWhiteAreas(blackAreas, left, top, right, bottom,
            minSquareWhiteAreaWidth, minSquareWhiteAreaHeight);
    whiteAreas.addAll(squareWhiteAreas);
    blackAreas.addAll(squareWhiteAreas);
    blackAreas.addAll(this.getWhiteAreasAroundLargeShapes(shapes));

    // Long narrow vertical white areas
    minVerticalWhiteAreaWidth = 1.0 * averageShapeWidth;
    minVerticalWhiteAreaHeight = 20.0 * averageShapeHeight;
    LOG.debug("minVerticalWhiteAreaWidth: " + minVerticalWhiteAreaWidth);
    LOG.debug("minVerticalWhiteAreaHeight: " + minVerticalWhiteAreaHeight);

    List<Rectangle> verticalWhiteAreas2 = whiteAreaFinder.getWhiteAreas(blackAreas, left, top, right, bottom,
            minVerticalWhiteAreaWidth, minVerticalWhiteAreaHeight);
    whiteAreas.addAll(verticalWhiteAreas2);

    return whiteAreas;
}

From source file:com.joliciel.csvLearner.CSVLearner.java

private void doCommandEvaluate() throws IOException {
    if (resultFilePath == null)
        throw new RuntimeException("Missing argument: resultFile");
    if (featureDir == null)
        throw new RuntimeException("Missing argument: featureDir");
    if (testIdFilePath != null) {
        if (crossValidation)
            throw new RuntimeException("Cannot combine testIdFile with cross validation");
        if (testSegment >= 0) {
            throw new RuntimeException("Cannot combine testIdFile with test segment");
        }//  ww w .  j  a  v  a  2 s. c o m
    }
    if (!crossValidation && testIdFilePath == null) {
        if (testSegment < 0)
            throw new RuntimeException("Missing argument: testSegment");
        if (testSegment > 9)
            throw new RuntimeException("testSegment must be an integer between 0 and 9");
    }
    if (outDirPath == null)
        throw new RuntimeException("Missing argument: outDir");

    LOG.info("Generating event list from CSV files...");
    CSVEventListReader reader = this.getReader(TrainingSetType.TEST_SEGMENT, false);

    GenericEvents events = reader.getEvents();

    File outDir = new File(outDirPath);
    outDir.mkdirs();
    String fileBase = this.featureDir.replace('/', '_');
    fileBase = fileBase.replace(':', '_');
    fileBase = fileBase + "_cutoff" + cutoff;

    if (generateEventFile) {
        File eventFile = new File(outDir, fileBase + "_events.txt");
        this.generateEventFile(eventFile, events);
    }

    File fscoreFile = new File(outDir, fileBase + "_fscores.csv");
    Writer fscoreFileWriter = new BufferedWriter(
            new OutputStreamWriter(new FileOutputStream(fscoreFile, false), "UTF8"));

    File outcomeFile = new File(outDir, fileBase + "_outcomes.csv");
    Writer outcomeFileWriter = new BufferedWriter(
            new OutputStreamWriter(new FileOutputStream(outcomeFile, false), "UTF8"));

    try {
        if (!crossValidation) {
            MaxentModel maxentModel = this.train(events, null);

            this.evaluate(maxentModel, events, fscoreFileWriter, outcomeFileWriter);
        } else {
            DescriptiveStatistics accuracyStats = new DescriptiveStatistics();
            Map<String, DescriptiveStatistics[]> outcomeFscoreStats = new TreeMap<String, DescriptiveStatistics[]>();
            for (int segment = 0; segment <= 9; segment++) {
                outcomeFileWriter.write("Run " + segment + ",\n");
                fscoreFileWriter.write("Run " + segment + ",\n");
                if (balanceOutcomes) {
                    for (String outcome : reader.getOutcomes()) {
                        int i = 0;
                        for (GenericEvent event : events) {
                            if (event.getOutcome().equals(outcome)) {
                                boolean test = i % 10 == segment;
                                event.setTest(test);
                                i++;
                            }
                        }
                    }
                } else {
                    int i = 0;
                    for (GenericEvent event : events) {
                        boolean test = i % 10 == segment;
                        event.setTest(test);
                        i++;
                    }
                }

                MaxentModel maxentModel = this.train(events, null);
                FScoreCalculator<String> fscoreCalculator = this.evaluate(maxentModel, events, fscoreFileWriter,
                        outcomeFileWriter);

                accuracyStats.addValue(fscoreCalculator.getTotalFScore());
                for (String outcome : fscoreCalculator.getOutcomeSet()) {
                    DescriptiveStatistics[] stats = outcomeFscoreStats.get(outcome);
                    if (stats == null) {
                        stats = new DescriptiveStatistics[3];
                        stats[0] = new DescriptiveStatistics();
                        stats[1] = new DescriptiveStatistics();
                        stats[2] = new DescriptiveStatistics();
                        outcomeFscoreStats.put(outcome, stats);
                    }
                    stats[0].addValue(fscoreCalculator.getPrecision(outcome));
                    stats[1].addValue(fscoreCalculator.getRecall(outcome));
                    stats[2].addValue(fscoreCalculator.getFScore(outcome));
                } // next outcome

                outcomeFileWriter.write("\n");

            } // next segment

            fscoreFileWriter.write(
                    "outcome,precision avg., precision dev., recall avg., recall dev., f-score avg., f-score dev.,\n");
            for (String outcome : outcomeFscoreStats.keySet()) {
                DescriptiveStatistics[] stats = outcomeFscoreStats.get(outcome);
                fscoreFileWriter
                        .write(CSVFormatter.format(outcome) + "," + CSVFormatter.format(stats[0].getMean())
                                + "," + CSVFormatter.format(stats[0].getStandardDeviation()) + ","
                                + CSVFormatter.format(stats[1].getMean()) + ","
                                + CSVFormatter.format(stats[1].getStandardDeviation()) + ","
                                + CSVFormatter.format(stats[2].getMean()) + ","
                                + CSVFormatter.format(stats[2].getStandardDeviation()) + "," + "\n");
            }
            fscoreFileWriter.write("TOTAL,,,,," + CSVFormatter.format(accuracyStats.getMean()) + ","
                    + CSVFormatter.format(accuracyStats.getStandardDeviation()) + ",\n");

            LOG.info("Accuracy mean: " + accuracyStats.getMean());
            LOG.info("Accuracy std dev: " + accuracyStats.getStandardDeviation());
        }
    } finally {
        fscoreFileWriter.flush();
        fscoreFileWriter.close();
        outcomeFileWriter.flush();
        outcomeFileWriter.close();
    }

    LOG.info("#### Complete ####");
}

From source file:com.joliciel.jochre.graphics.RowOfShapesImpl.java

/**
 * Assign guidelines for a certain subset of shapes, and return the x-height.
 * @param startShape//from w  w  w.  jav a2  s. c  om
 * @param endShape
 * @return
 */
int assignGuideLines(List<GroupOfShapes> groupsToAssign) {
    LOG.debug("assignGuideLines internal");
    double meanHorizontalSlope = this.getContainer().getMeanHorizontalSlope();

    // the base-line and mean-line will be at a fixed distance away from the midpoint
    // the question is, which distance!
    // To find this out, we count number of black pixels on each row above this line
    // And then start analysing from the top and the bottom until the number drops off sharply

    // The notion of "groupsToAssign" is used to only assign guidelines
    // to a subset of the groups on the line
    // when the line contains two different font sizes
    List<Shape> shapes = new ArrayList<Shape>();
    if (groupsToAssign != null) {
        for (GroupOfShapes group : groupsToAssign) {
            shapes.addAll(group.getShapes());
        }
    } else {
        shapes = this.getShapes();
    }

    int i = 0;
    DescriptiveStatistics shapeWidthStats = new DescriptiveStatistics();
    DescriptiveStatistics shapeHeightStats = new DescriptiveStatistics();

    for (Shape shape : this.getShapes()) {
        shapeWidthStats.addValue(shape.getWidth());
        shapeHeightStats.addValue(shape.getHeight());
    }

    double minWidth = shapeWidthStats.getPercentile(25);
    double maxWidth = shapeWidthStats.getPercentile(75);
    double minHeight = shapeHeightStats.getPercentile(45);
    double maxHeight = shapeHeightStats.getPercentile(75);

    double rowMidPointX = (double) (this.getLeft() + this.getRight()) / 2.0;

    // calculating the Y midpoint by the shapes in the row, instead of by the top & bottom of row
    Mean rowMidPointYMean = new Mean();
    for (Shape shape : this.getShapes()) {
        // only add points whose shape is of "average" width and height (to leave out commas, etc.)
        if (shape.getWidth() >= minWidth && shape.getWidth() <= maxWidth && shape.getHeight() >= minHeight
                && shape.getHeight() <= maxHeight) {
            rowMidPointYMean.increment((double) (shape.getBottom() + shape.getTop()) / 2.0);
        }
    }

    double rowMidPointY = (double) (this.getTop() + this.getBottom()) / 2.0;
    if (rowMidPointYMean.getN() > 0)
        rowMidPointY = rowMidPointYMean.getResult();
    LOG.debug("rowMidPointX: " + rowMidPointX);
    LOG.debug("rowMidPointY: " + rowMidPointY);

    // figure out where the top-most shape starts and the bottom-most shape ends, relative to the y midline
    int minTop = Integer.MAX_VALUE;
    int maxBottom = Integer.MIN_VALUE;
    List<Integer> rowYMidPoints = new ArrayList<Integer>(shapes.size());
    for (Shape shape : shapes) {
        double shapeMidPointX = (double) (shape.getLeft() + shape.getRight()) / 2.0;
        int shapeMidPointY = (int) Math
                .round(rowMidPointY + (meanHorizontalSlope * (shapeMidPointX - rowMidPointX)));
        rowYMidPoints.add(shapeMidPointY);

        int relativeTop = shape.getTop() - shapeMidPointY;
        int relativeBottom = shape.getBottom() - shapeMidPointY;

        if (relativeTop < minTop)
            minTop = relativeTop;
        if (relativeBottom > maxBottom)
            maxBottom = relativeBottom;
    }
    if (minTop > 0)
        minTop = 0;
    if (maxBottom < 0)
        maxBottom = 0;

    int yIntervalTop = 0 - minTop;
    int yIntervalBottom = maxBottom;
    int yInterval = yIntervalTop + 1 + yIntervalBottom;
    LOG.debug("yIntervalTop: " + yIntervalTop);
    LOG.debug("yIntervalBottom: " + yIntervalBottom);
    LOG.debug("yInterval: " + yInterval);
    int[] pixelCounts = new int[yInterval];

    // Get the pixel count for each row
    // examining one shape at a time to limit ourselves to the pixels that are
    // actually considered to be in this row
    int blackThreshold = this.getContainer().getSeparationThreshold();
    int shapeIndex = 0;
    int shapeCount = 0;
    for (Shape shape : shapes) {
        if (shape.getHeight() >= minHeight) {
            LOG.trace(shape.toString());
            shapeCount++;
            int shapeMidPointY = rowYMidPoints.get(shapeIndex);
            int zeroLine = shapeMidPointY - yIntervalTop;
            int topIndex = shape.getTop() - zeroLine;
            for (int x = 0; x < shape.getWidth(); x++) {
                for (int y = 0; y < shape.getHeight(); y++) {
                    int yIndex = topIndex + y;
                    if (yIndex >= 0 && yIndex < pixelCounts.length
                            && shape.isPixelBlack(x, y, blackThreshold)) {
                        pixelCounts[yIndex]++;
                    }
                }
            }
        }
        shapeIndex++;
    }
    LOG.debug("Got pixels from " + shapeCount + " shapes.");

    boolean notEnoughShapes = shapeCount < 3;
    LOG.debug("notEnoughShapes? " + notEnoughShapes);

    // We start at the top
    // As soon as we reach a line with more pixels than the mean, we assume this is the mean-line
    Mean pixelCountMeanTop = new Mean();
    StandardDeviation pixelCountStdDevTop = new StandardDeviation();
    for (i = 0; i <= yIntervalTop; i++) {
        pixelCountMeanTop.increment(pixelCounts[i]);
        pixelCountStdDevTop.increment(pixelCounts[i]);
    }
    LOG.debug("Top: pixel count mean: " + pixelCountMeanTop.getResult() + ", std dev: "
            + pixelCountStdDevTop.getResult());

    double threshold = pixelCountMeanTop.getResult() * 1.1;
    if (notEnoughShapes) {
        threshold = threshold / 2.0;
    }
    double lowerThreshold = threshold / 2.0;

    LOG.debug("Top threshold: " + threshold);
    LOG.debug("Top lowerThreshold: " + lowerThreshold);

    int meanLine = 0;
    boolean findMeanLine = true;
    for (i = 0; i <= yIntervalTop; i++) {
        int pixelCount = pixelCounts[i];
        if (findMeanLine && pixelCount > threshold) {
            meanLine = i;
            findMeanLine = false;
        } else if (!findMeanLine && pixelCount < lowerThreshold) {
            findMeanLine = true;
        }
    }

    // We start at the bottom
    // As soon as we reach a line with more pixels than the mean, we assume this is the base-line

    Mean pixelCountMeanBottom = new Mean();
    StandardDeviation pixelCountStdDevBottom = new StandardDeviation();
    for (i = pixelCounts.length - 1; i >= yIntervalTop; i--) {
        pixelCountMeanBottom.increment(pixelCounts[i]);
        pixelCountStdDevBottom.increment(pixelCounts[i]);
    }
    LOG.debug("Bottom: pixel count mean: " + pixelCountMeanBottom.getResult() + ", std dev: "
            + pixelCountStdDevBottom.getResult());

    threshold = pixelCountMeanBottom.getResult() * 1.1;
    if (notEnoughShapes) {
        threshold = threshold / 2.0;
    }
    lowerThreshold = threshold / 2.0;

    LOG.debug("Bottom threshold: " + threshold);
    LOG.debug("Bottom lowerThreshold: " + lowerThreshold);
    int baseLine = meanLine;
    boolean findBaseLine = true;
    for (i = pixelCounts.length - 1; i >= yIntervalTop; i--) {
        int pixelCount = pixelCounts[i];
        if (findBaseLine && pixelCount > threshold) {
            baseLine = i;
            findBaseLine = false;
        } else if (!findBaseLine && pixelCount < lowerThreshold) {
            findBaseLine = true;
        }
    }

    for (i = 0; i < yInterval; i++) {
        int pixelCount = pixelCounts[i];
        if (i == meanLine)
            LOG.trace("======= MEAN LINE " + i + " ==========");
        LOG.trace("pixel row " + i + ". pixel count " + pixelCount);
        if (i == baseLine)
            LOG.trace("======= BASE LINE " + i + " ==========");
    }

    // assign base lines and mean lines to each shape
    shapeIndex = 0;
    for (Shape shape : shapes) {
        int shapeMidPointY = rowYMidPoints.get(shapeIndex);
        int yMeanline = (shapeMidPointY - yIntervalTop) + meanLine;
        int yBaseline = (shapeMidPointY - yIntervalTop) + baseLine;
        LOG.trace(shape.toString() + ", meanLine: " + (yMeanline - shape.getTop()) + ", baseLine: "
                + (yBaseline - shape.getTop()));
        shape.setBaseLine(yBaseline - shape.getTop());
        shape.setMeanLine(yMeanline - shape.getTop());
        shapeIndex++;
    } // next shape

    int xHeight = baseLine - meanLine;
    return xHeight;
}

From source file:com.joliciel.talismane.stats.FScoreCalculator.java

/**
 * Combine the results of n cross validation results into a single f-score file.
 * @param directory//from ww w  .  j av  a2  s  .  c  om
 * @param prefix
 * @param suffix
 * @param csvFileWriter
 */
static void combineCrossValidationResults(File directory, String prefix, String suffix, Writer csvFileWriter) {
    try {
        File[] files = directory.listFiles();
        Map<Integer, Map<String, FScoreStats>> fileStatsMap = new HashMap<Integer, Map<String, FScoreStats>>();
        for (File file : files) {
            if (file.getName().startsWith(prefix) && file.getName().endsWith(suffix)) {
                int index = Integer.parseInt(file.getName().substring(prefix.length(), prefix.length() + 1));
                Map<String, FScoreStats> statsMap = new HashMap<String, FScoreCalculator.FScoreStats>();
                fileStatsMap.put(index, statsMap);
                Scanner scanner = new Scanner(
                        new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")));

                boolean firstLine = true;
                int truePositivePos = -1;

                while (scanner.hasNextLine()) {
                    String line = scanner.nextLine();
                    List<String> cells = CSV.getCSVCells(line);
                    if (firstLine) {
                        int i = 0;
                        for (String cell : cells) {
                            if (cell.equals("true+")) {
                                truePositivePos = i;
                                break;
                            }
                            i++;
                        }
                        if (truePositivePos < 0) {
                            throw new JolicielException("Couldn't find true+ on first line");
                        }
                        firstLine = false;
                    } else {
                        FScoreStats stats = new FScoreStats();
                        String outcome = cells.get(0);
                        stats.outcome = outcome;
                        if (outcome.equals("AVERAGE"))
                            break;
                        stats.truePos = Integer.parseInt(cells.get(truePositivePos));
                        stats.falsePos = Integer.parseInt(cells.get(truePositivePos + 1));
                        stats.falseNeg = Integer.parseInt(cells.get(truePositivePos + 2));
                        stats.precision = Double.parseDouble(cells.get(truePositivePos + 3));
                        stats.recall = Double.parseDouble(cells.get(truePositivePos + 4));
                        stats.fScore = Double.parseDouble(cells.get(truePositivePos + 5));
                        statsMap.put(outcome, stats);
                    } // firstLine?
                } // has more lines
                scanner.close();
            } // file in current series
        } // next file

        int numFiles = fileStatsMap.size();
        if (numFiles == 0) {
            throw new JolicielException("No files found matching prefix and suffix provided");
        }
        Map<String, DescriptiveStatistics> descriptiveStatsMap = new HashMap<String, DescriptiveStatistics>();
        Map<String, FScoreStats> outcomeStats = new HashMap<String, FScoreCalculator.FScoreStats>();
        Set<String> outcomes = new TreeSet<String>();
        for (Map<String, FScoreStats> statsMap : fileStatsMap.values()) {
            for (FScoreStats stats : statsMap.values()) {
                DescriptiveStatistics fScoreStats = descriptiveStatsMap.get(stats.outcome + "fScore");
                if (fScoreStats == null) {
                    fScoreStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "fScore", fScoreStats);
                }
                fScoreStats.addValue(stats.fScore);
                DescriptiveStatistics precisionStats = descriptiveStatsMap.get(stats.outcome + "precision");
                if (precisionStats == null) {
                    precisionStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "precision", precisionStats);
                }
                precisionStats.addValue(stats.precision);
                DescriptiveStatistics recallStats = descriptiveStatsMap.get(stats.outcome + "recall");
                if (recallStats == null) {
                    recallStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "recall", recallStats);
                }
                recallStats.addValue(stats.recall);

                FScoreStats outcomeStat = outcomeStats.get(stats.outcome);
                if (outcomeStat == null) {
                    outcomeStat = new FScoreStats();
                    outcomeStat.outcome = stats.outcome;
                    outcomeStats.put(stats.outcome, outcomeStat);
                }
                outcomeStat.truePos += stats.truePos;
                outcomeStat.falsePos += stats.falsePos;
                outcomeStat.falseNeg += stats.falseNeg;

                outcomes.add(stats.outcome);
            }
        }

        csvFileWriter.write(CSV.format(prefix + suffix));
        csvFileWriter.write("\n");
        csvFileWriter.write(CSV.format("outcome"));
        csvFileWriter.write(CSV.format("true+") + CSV.format("false+") + CSV.format("false-")
                + CSV.format("tot precision") + CSV.format("avg precision") + CSV.format("dev precision")
                + CSV.format("tot recall") + CSV.format("avg recall") + CSV.format("dev recall")
                + CSV.format("tot f-score") + CSV.format("avg f-score") + CSV.format("dev f-score") + "\n");

        for (String outcome : outcomes) {
            csvFileWriter.write(CSV.format(outcome));
            FScoreStats outcomeStat = outcomeStats.get(outcome);
            DescriptiveStatistics fScoreStats = descriptiveStatsMap.get(outcome + "fScore");
            DescriptiveStatistics precisionStats = descriptiveStatsMap.get(outcome + "precision");
            DescriptiveStatistics recallStats = descriptiveStatsMap.get(outcome + "recall");
            outcomeStat.calculate();
            csvFileWriter.write(CSV.format(outcomeStat.truePos));
            csvFileWriter.write(CSV.format(outcomeStat.falsePos));
            csvFileWriter.write(CSV.format(outcomeStat.falseNeg));
            csvFileWriter.write(CSV.format(outcomeStat.precision * 100));
            csvFileWriter.write(CSV.format(precisionStats.getMean()));
            csvFileWriter.write(CSV.format(precisionStats.getStandardDeviation()));
            csvFileWriter.write(CSV.format(outcomeStat.recall * 100));
            csvFileWriter.write(CSV.format(recallStats.getMean()));
            csvFileWriter.write(CSV.format(recallStats.getStandardDeviation()));
            csvFileWriter.write(CSV.format(outcomeStat.fScore * 100));
            csvFileWriter.write(CSV.format(fScoreStats.getMean()));
            csvFileWriter.write(CSV.format(fScoreStats.getStandardDeviation()));
            csvFileWriter.write("\n");
            csvFileWriter.flush();
        }
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}

From source file:com.joliciel.jochre.graphics.SourceImageImpl.java

public List<Rectangle> findColumnSeparators() {
    if (columnSeparators == null) {
        LOG.debug("############ findColumnSeparators ##############");
        double slope = this.getMeanHorizontalSlope();

        double imageMidPointX = (double) this.getWidth() / 2.0;

        int[] horizontalCounts = new int[this.getHeight()];
        DescriptiveStatistics rowXHeightStats = new DescriptiveStatistics();
        // first get the fill factor for each horizontal row in the image
        for (RowOfShapes row : this.getRows()) {
            rowXHeightStats.addValue(row.getXHeight());
            for (Shape shape : row.getShapes()) {
                double shapeMidPointX = (double) (shape.getLeft() + shape.getRight()) / 2.0;
                int slopeAdjustedTop = (int) Math
                        .round(shape.getTop() + (slope * (shapeMidPointX - imageMidPointX)));
                if (slopeAdjustedTop >= 0 && slopeAdjustedTop < this.getHeight()) {
                    for (int i = 0; i < shape.getHeight(); i++) {
                        if (slopeAdjustedTop + i < horizontalCounts.length)
                            horizontalCounts[slopeAdjustedTop + i] += shape.getWidth();
                    }/*from w  w w  .j a  v a  2  s .  c  om*/
                }
            }
        }
        DescriptiveStatistics horizontalStats = new DescriptiveStatistics();
        DescriptiveStatistics horizontalStatsNonEmpty = new DescriptiveStatistics();
        for (int i = 0; i < this.getHeight(); i++) {
            //         LOG.trace("Row " + i + ": " + horizontalCounts[i]);
            horizontalStats.addValue(horizontalCounts[i]);
            if (horizontalCounts[i] > 0)
                horizontalStatsNonEmpty.addValue(horizontalCounts[i]);
        }
        LOG.debug("Mean horizontal count: " + horizontalStats.getMean());
        LOG.debug("Median horizontal count: " + horizontalStats.getPercentile(50));
        LOG.debug("25 percentile horizontal count: " + horizontalStats.getPercentile(25));
        LOG.debug("Mean horizontal count (non empty): " + horizontalStatsNonEmpty.getMean());
        LOG.debug("Median horizontal count (non empty): " + horizontalStatsNonEmpty.getPercentile(50));
        LOG.debug("25 percentile horizontal count (non empty): " + horizontalStatsNonEmpty.getPercentile(25));
        LOG.debug("10 percentile horizontal count (non empty): " + horizontalStatsNonEmpty.getPercentile(10));

        double maxEmptyRowCount = horizontalStatsNonEmpty.getMean() / 8.0;
        LOG.debug("maxEmptyRowCount: " + maxEmptyRowCount);

        boolean inEmptyHorizontalRange = false;
        List<int[]> emptyHorizontalRanges = new ArrayList<int[]>();
        int emptyHorizontalRangeStart = 0;
        for (int i = 0; i < this.getHeight(); i++) {
            if (!inEmptyHorizontalRange && horizontalCounts[i] <= maxEmptyRowCount) {
                inEmptyHorizontalRange = true;
                emptyHorizontalRangeStart = i;
            } else if (inEmptyHorizontalRange && horizontalCounts[i] > maxEmptyRowCount) {
                inEmptyHorizontalRange = false;
                emptyHorizontalRanges.add(new int[] { emptyHorizontalRangeStart, i });
            }
        }
        if (inEmptyHorizontalRange) {
            emptyHorizontalRanges.add(new int[] { emptyHorizontalRangeStart, this.getHeight() - 1 });
        }

        LOG.debug("rowXHeight mean: " + rowXHeightStats.getMean());
        LOG.debug("rowXHeight median: " + rowXHeightStats.getPercentile(50));
        double minHorizontalBreak = rowXHeightStats.getMean() * 2.0;
        LOG.debug("minHorizontalBreak: " + minHorizontalBreak);
        int smallBreakCount = 0;
        int mainTextTop = 0;
        int bigBreakCount = 0;
        for (int[] emptyHorizontalRange : emptyHorizontalRanges) {
            int height = emptyHorizontalRange[1] - emptyHorizontalRange[0];
            LOG.trace("empty range: " + emptyHorizontalRange[0] + ", " + emptyHorizontalRange[1] + " = "
                    + height);
            if (bigBreakCount < 2 && smallBreakCount < 2 && height > minHorizontalBreak) {
                mainTextTop = emptyHorizontalRange[1];
                bigBreakCount++;
            }
            if (height <= minHorizontalBreak)
                smallBreakCount++;
        }

        LOG.debug("mainTextTop:" + mainTextTop);
        // lift mainTextTop upwards by max an x-height or till we reach a zero row
        int minTop = mainTextTop - (int) (rowXHeightStats.getMean() / 2.0);
        if (minTop < 0)
            minTop = 0;
        for (int i = mainTextTop; i > minTop; i--) {
            mainTextTop = i;
            if (horizontalCounts[i] == 0) {
                break;
            }
        }
        LOG.debug("mainTextTop (adjusted):" + mainTextTop);

        smallBreakCount = 0;
        bigBreakCount = 0;
        int mainTextBottom = this.getHeight();
        for (int i = emptyHorizontalRanges.size() - 1; i >= 0; i--) {
            int[] emptyHorizontalRange = emptyHorizontalRanges.get(i);
            int height = emptyHorizontalRange[1] - emptyHorizontalRange[0];
            LOG.trace("emptyHorizontalRange: " + emptyHorizontalRange[0] + ", height: " + height
                    + ", bigBreakCount: " + bigBreakCount + ", smallBreakCount: " + smallBreakCount);
            if ((bigBreakCount + smallBreakCount) <= 2 && height > minHorizontalBreak) {
                mainTextBottom = emptyHorizontalRange[0];
                LOG.trace("Set mainTextBottom to " + mainTextBottom);
                bigBreakCount++;
            }
            if (height <= minHorizontalBreak)
                smallBreakCount++;
            if ((bigBreakCount + smallBreakCount) > 2)
                break;
        }
        LOG.debug("mainTextBottom:" + mainTextBottom);
        // lower mainTextBottom downwards by max an x-height or till we reach a zero row
        int maxBottom = mainTextBottom + (int) (rowXHeightStats.getMean() / 2.0);
        if (maxBottom > this.getHeight())
            maxBottom = this.getHeight();
        for (int i = mainTextBottom; i < maxBottom; i++) {
            mainTextBottom = i;
            if (horizontalCounts[i] == 0) {
                break;
            }
        }
        LOG.debug("mainTextBottom (adjusted):" + mainTextBottom);

        int[] verticalCounts = new int[this.getWidth()];
        // first get the fill factor for each horizontal row in the image
        for (RowOfShapes row : this.getRows()) {
            for (Shape shape : row.getShapes()) {
                int slopeAdjustedLeft = (int) Math.round(shape.getLeft() - row.getXAdjustment());
                double shapeMidPointX = (double) (shape.getLeft() + shape.getRight()) / 2.0;
                int slopeAdjustedTop = (int) Math
                        .round(shape.getTop() + (slope * (shapeMidPointX - imageMidPointX)));
                if (slopeAdjustedTop >= mainTextTop && slopeAdjustedTop <= mainTextBottom
                        && slopeAdjustedLeft >= 0 && slopeAdjustedLeft < this.getWidth()) {
                    for (int i = 0; i < shape.getWidth(); i++) {
                        if (slopeAdjustedLeft + i < this.getWidth())
                            verticalCounts[slopeAdjustedLeft + i] += shape.getHeight();
                    }
                }
            }
        }

        DescriptiveStatistics verticalStats = new DescriptiveStatistics();
        DescriptiveStatistics verticalStatsNonEmpty = new DescriptiveStatistics();
        for (int i = 0; i < this.getWidth(); i++) {
            //         LOG.trace("Column " + i + ": " + verticalCounts[i]);
            verticalStats.addValue(verticalCounts[i]);
            if (verticalCounts[i] > 0)
                verticalStatsNonEmpty.addValue(verticalCounts[i]);
        }
        LOG.debug("Mean vertical count: " + verticalStats.getMean());
        LOG.debug("Median vertical count: " + verticalStats.getPercentile(50));
        LOG.debug("25 percentile vertical count: " + verticalStats.getPercentile(25));
        LOG.debug("Mean vertical count (non empty): " + verticalStatsNonEmpty.getMean());
        LOG.debug("Median vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(50));
        LOG.debug("25 percentile vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(25));
        LOG.debug("10 percentile vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(10));
        LOG.debug("1 percentile vertical count (non empty): " + verticalStatsNonEmpty.getPercentile(1));

        //         double maxEmptyColumnCount = verticalStatsNonEmpty.getMean() / 8.0;
        double maxEmptyColumnCount = verticalStatsNonEmpty.getPercentile(1);
        LOG.debug("maxEmptyColumnCount: " + maxEmptyColumnCount);

        boolean inEmptyVerticalRange = false;
        List<int[]> emptyVerticalRanges = new ArrayList<int[]>();
        int emptyVerticalRangeStart = 0;
        for (int i = 0; i < this.getWidth(); i++) {
            if (!inEmptyVerticalRange && verticalCounts[i] <= maxEmptyColumnCount) {
                inEmptyVerticalRange = true;
                emptyVerticalRangeStart = i;
            } else if (inEmptyVerticalRange && verticalCounts[i] > maxEmptyColumnCount) {
                inEmptyVerticalRange = false;
                emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, i });
            }
        }
        if (inEmptyVerticalRange) {
            emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, this.getWidth() - 1 });
        }

        LOG.debug("rowXHeight mean: " + rowXHeightStats.getMean());
        LOG.debug("rowXHeight median: " + rowXHeightStats.getPercentile(50));
        double minVerticalBreak = rowXHeightStats.getMean() * 1.0;
        LOG.debug("minVerticalBreak: " + minVerticalBreak);

        List<int[]> columnBreaks = new ArrayList<int[]>();
        for (int[] emptyVerticalRange : emptyVerticalRanges) {
            int width = emptyVerticalRange[1] - emptyVerticalRange[0];
            LOG.trace("empty range: " + emptyVerticalRange[0] + ", " + emptyVerticalRange[1] + " = " + width);

            if (width >= minVerticalBreak) {
                columnBreaks.add(emptyVerticalRange);
                LOG.trace("Found column break!");
            }
        }

        columnSeparators = new ArrayList<Rectangle>();
        for (int[] columnBreak : columnBreaks) {
            // reduce the column break to the thickest empty area if possible
            int[] bestColumnBreak = null;
            double originalCount = maxEmptyColumnCount;
            maxEmptyColumnCount = 0;
            while (bestColumnBreak == null && maxEmptyColumnCount <= originalCount) {
                inEmptyVerticalRange = false;
                emptyVerticalRanges = new ArrayList<int[]>();
                emptyVerticalRangeStart = columnBreak[0];
                for (int i = columnBreak[0]; i <= columnBreak[1]; i++) {
                    if (!inEmptyVerticalRange && verticalCounts[i] <= maxEmptyColumnCount) {
                        inEmptyVerticalRange = true;
                        emptyVerticalRangeStart = i;
                    } else if (inEmptyVerticalRange && verticalCounts[i] > maxEmptyColumnCount) {
                        inEmptyVerticalRange = false;
                        emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, i });
                    }
                }
                if (inEmptyVerticalRange) {
                    emptyVerticalRanges.add(new int[] { emptyVerticalRangeStart, columnBreak[1] });
                }

                for (int[] emptyVerticalRange : emptyVerticalRanges) {
                    if (bestColumnBreak == null || (emptyVerticalRange[1]
                            - emptyVerticalRange[0] > bestColumnBreak[1] - bestColumnBreak[0]))
                        bestColumnBreak = emptyVerticalRange;
                }
                maxEmptyColumnCount += (originalCount / 8.0);
            }

            if (bestColumnBreak == null)
                bestColumnBreak = columnBreak;

            Rectangle whiteArea = new WhiteArea(bestColumnBreak[0], mainTextTop, bestColumnBreak[1],
                    mainTextBottom);
            columnSeparators.add(whiteArea);
            LOG.debug("ColumnBreak: " + whiteArea);
        } // next column break
    }
    return columnSeparators;
}