Example usage for org.apache.commons.math.stat.descriptive.moment StandardDeviation StandardDeviation

List of usage examples for org.apache.commons.math.stat.descriptive.moment StandardDeviation StandardDeviation

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive.moment StandardDeviation StandardDeviation.

Prototype

public StandardDeviation() 

Source Link

Document

Constructs a StandardDeviation.

Usage

From source file:de.tudarmstadt.ukp.dkpro.tc.mallet.report.MalletBatchCrossValidationReport.java

@Override
public void execute() throws Exception {
    StorageService store = getContext().getStorageService();

    FlexTable<String> table = FlexTable.forClass(String.class);

    Map<String, List<Double>> key2resultValues = new HashMap<String, List<Double>>();

    for (TaskContextMetadata subcontext : getSubtasks()) {
        String name = BatchTask.class.getSimpleName() + "CrossValidation";
        // one CV batch (which internally ran numFolds times)
        if (subcontext.getLabel().startsWith(name)) {
            Map<String, String> discriminatorsMap = store
                    .retrieveBinary(subcontext.getId(), Task.DISCRIMINATORS_KEY, new PropertiesAdapter())
                    .getMap();/*from ww  w. ja  v a 2  s.c  om*/

            File eval = store.getStorageFolder(subcontext.getId(), EVAL_FILE_NAME + SUFFIX_CSV);

            Map<String, String> resultMap = new HashMap<String, String>();

            String[][] evalMatrix = null;

            int i = 0;
            for (String line : FileUtils.readLines(eval)) {
                String[] tokenizedLine = StrTokenizer.getCSVInstance(line).getTokenArray();
                if (evalMatrix == null) {
                    evalMatrix = new String[FileUtils.readLines(eval).size()][tokenizedLine.length];
                }
                evalMatrix[i] = tokenizedLine;
                i++;
            }

            // columns
            for (int j = 0; j < evalMatrix[0].length; j++) {
                String header = evalMatrix[0][j];
                String[] vals = new String[evalMatrix.length - 1];
                // rows
                for (int k = 1; k < evalMatrix.length; k++) {
                    if (evalMatrix[k][j].equals("null")) {
                        vals[k - 1] = String.valueOf(0.);
                    } else {
                        vals[k - 1] = evalMatrix[k][j];
                    }
                }
                Mean mean = new Mean();
                Sum sum = new Sum();
                StandardDeviation std = new StandardDeviation();

                double[] dVals = new double[vals.length];
                Set<String> sVals = new HashSet<String>();
                for (int k = 0; k < vals.length; k++) {
                    try {
                        dVals[k] = Double.parseDouble(vals[k]);
                        sVals = null;
                    } catch (NumberFormatException e) {
                        dVals = null;
                        sVals.add(vals[k]);
                    }
                }

                if (dVals != null) {
                    if (nonAveragedResultsMeasures.contains(header)) {
                        resultMap.put(header, String.valueOf(sum.evaluate(dVals)));
                    } else {
                        resultMap.put(header, String.valueOf(mean.evaluate(dVals)) + "\u00B1"
                                + String.valueOf(std.evaluate(dVals)));
                    }
                } else {
                    if (sVals.size() > 1) {
                        resultMap.put(header, "---");
                    } else {
                        resultMap.put(header, vals[0]);
                    }
                }
            }

            String key = getKey(discriminatorsMap);

            List<Double> results;
            if (key2resultValues.get(key) == null) {
                results = new ArrayList<Double>();
            } else {
                results = key2resultValues.get(key);

            }
            key2resultValues.put(key, results);

            Map<String, String> values = new HashMap<String, String>();
            Map<String, String> cleanedDiscriminatorsMap = new HashMap<String, String>();

            for (String disc : discriminatorsMap.keySet()) {
                if (!ReportUtils.containsExcludePattern(disc, discriminatorsToExclude)) {
                    cleanedDiscriminatorsMap.put(disc, discriminatorsMap.get(disc));
                }
            }
            values.putAll(cleanedDiscriminatorsMap);
            values.putAll(resultMap);

            table.addRow(subcontext.getLabel(), values);
        }
    }

    getContext().getLoggingService().message(getContextLabel(), ReportUtils.getPerformanceOverview(table));

    // Excel cannot cope with more than 255 columns
    if (table.getColumnIds().length <= 255) {
        getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_EXCEL, table.getExcelWriter());
    }
    getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_CSV, table.getCsvWriter());

    table.setCompact(false);
    // Excel cannot cope with more than 255 columns
    if (table.getColumnIds().length <= 255) {
        getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_EXCEL, table.getExcelWriter());
    }
    getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_CSV, table.getCsvWriter());

    // output the location of the batch evaluation folder
    // otherwise it might be hard for novice users to locate this
    File dummyFolder = store.getStorageFolder(getContext().getId(), "dummy");
    // TODO can we also do this without creating and deleting the dummy folder?
    getContext().getLoggingService().message(getContextLabel(),
            "Storing detailed results in:\n" + dummyFolder.getParent() + "\n");
    dummyFolder.delete();
}

From source file:de.tudarmstadt.ukp.dkpro.tc.crfsuite.CRFSuiteBatchCrossValidationReport.java

@Override
public void execute() throws Exception {
    StorageService store = getContext().getStorageService();

    FlexTable<String> table = FlexTable.forClass(String.class);

    Map<String, List<Double>> key2resultValues = new HashMap<String, List<Double>>();

    for (TaskContextMetadata subcontext : getSubtasks()) {
        String name = ExperimentCrossValidation.class.getSimpleName();
        // one CV batch (which internally ran numFolds times)
        if (subcontext.getLabel().startsWith(name)) {
            Map<String, String> discriminatorsMap = store
                    .retrieveBinary(subcontext.getId(), Task.DISCRIMINATORS_KEY, new PropertiesAdapter())
                    .getMap();/*from   w  ww .j a  va 2  s .  c o m*/

            File eval = store.getStorageFolder(subcontext.getId(), EVAL_FILE_NAME + SUFFIX_CSV);

            Map<String, String> resultMap = new HashMap<String, String>();

            String[][] evalMatrix = null;

            int i = 0;
            for (String line : FileUtils.readLines(eval)) {
                String[] tokenizedLine = StrTokenizer.getCSVInstance(line).getTokenArray();
                if (evalMatrix == null) {
                    evalMatrix = new String[FileUtils.readLines(eval).size()][tokenizedLine.length];
                }
                evalMatrix[i] = tokenizedLine;
                i++;
            }

            // columns
            for (int j = 0; j < evalMatrix[0].length; j++) {
                String header = evalMatrix[0][j];
                String[] vals = new String[evalMatrix.length - 1];
                // rows
                for (int k = 1; k < evalMatrix.length; k++) {
                    if (evalMatrix[k][j].equals("null")) {
                        vals[k - 1] = String.valueOf(0.);
                    } else {
                        vals[k - 1] = evalMatrix[k][j];
                    }

                }
                Mean mean = new Mean();
                Sum sum = new Sum();
                StandardDeviation std = new StandardDeviation();

                double[] dVals = new double[vals.length];
                Set<String> sVals = new HashSet<String>();
                for (int k = 0; k < vals.length; k++) {
                    try {
                        dVals[k] = Double.parseDouble(vals[k]);
                        sVals = null;
                    } catch (NumberFormatException e) {
                        dVals = null;
                        sVals.add(vals[k]);
                    }
                }

                if (dVals != null) {
                    if (nonAveragedResultsMeasures.contains(header)) {
                        resultMap.put(header + foldSum, String.valueOf(sum.evaluate(dVals)));
                    } else {
                        resultMap.put(header + foldAveraged, String.valueOf(
                                mean.evaluate(dVals) + "\u00B1" + String.valueOf(std.evaluate(dVals))));
                    }
                } else {
                    if (sVals.size() > 1) {
                        resultMap.put(header, "---");
                    } else {
                        resultMap.put(header, vals[0]);
                    }
                }
            }

            String key = getKey(discriminatorsMap);

            List<Double> results;
            if (key2resultValues.get(key) == null) {
                results = new ArrayList<Double>();
            } else {
                results = key2resultValues.get(key);

            }
            key2resultValues.put(key, results);

            Map<String, String> values = new HashMap<String, String>();
            Map<String, String> cleanedDiscriminatorsMap = new HashMap<String, String>();

            for (String disc : discriminatorsMap.keySet()) {
                if (!ReportUtils.containsExcludePattern(disc, discriminatorsToExclude)) {
                    cleanedDiscriminatorsMap.put(disc, discriminatorsMap.get(disc));
                }
            }
            values.putAll(cleanedDiscriminatorsMap);
            values.putAll(resultMap);

            table.addRow(subcontext.getLabel(), values);
        }
    }

    getContext().getLoggingService().message(getContextLabel(), ReportUtils.getPerformanceOverview(table));
    // Excel cannot cope with more than 255 columns
    if (table.getColumnIds().length <= 255) {
        getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_EXCEL, table.getExcelWriter());
    }
    getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_CSV, table.getCsvWriter());

    table.setCompact(false);
    // Excel cannot cope with more than 255 columns
    if (table.getColumnIds().length <= 255) {
        getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_EXCEL, table.getExcelWriter());
    }
    getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_CSV, table.getCsvWriter());

    // output the location of the batch evaluation folder
    // otherwise it might be hard for novice users to locate this
    File dummyFolder = store.getStorageFolder(getContext().getId(), "dummy");
    // TODO can we also do this without creating and deleting the dummy folder?
    getContext().getLoggingService().message(getContextLabel(),
            "Storing detailed results in:\n" + dummyFolder.getParent() + "\n");
    dummyFolder.delete();
}

From source file:cerrla.Performance.java

/**
 * Records performance scores using sliding windows of results.
 * /* ww  w  .j a  v  a  2  s . c  o m*/
 * @param currentEpisode
 *            The current episode.
 */
public void recordPerformanceScore(int currentEpisode) {
    if (recentScores_.isEmpty())
        return;
    // Transform the queues into arrays
    double[] vals = new double[recentScores_.size()];
    int i = 0;
    for (Double val : recentScores_)
        vals[i++] = val.doubleValue();
    double[] envSDs = new double[internalSDs_.size()];
    i = 0;
    for (Double envSD : internalSDs_)
        envSDs[i++] = envSD.doubleValue();

    Mean m = new Mean();
    StandardDeviation sd = new StandardDeviation();
    double mean = m.evaluate(vals);
    double meanDeviation = sd.evaluate(envSDs) * CONVERGENCE_PERCENT_BUFFER;

    Double[] details = new Double[PerformanceDetails.values().length];
    details[PerformanceDetails.EPISODE.ordinal()] = Double.valueOf(currentEpisode);
    details[PerformanceDetails.MEAN.ordinal()] = mean;
    details[PerformanceDetails.SD.ordinal()] = sd.evaluate(vals);
    performanceDetails_.put(currentEpisode, details);

    // Output current means
    if (ProgramArgument.SYSTEM_OUTPUT.booleanValue() && !frozen_) {
        DecimalFormat formatter = new DecimalFormat("#0.00");
        String meanString = formatter.format(mean);
        String sdString = formatter.format(meanDeviation);
        System.out.println("Average performance: " + meanString + " " + SD_SYMBOL + " " + sdString);
    }
    if (frozen_) {
        System.out.println(currentEpisode + ": " + details[PerformanceDetails.MEAN.ordinal()]);
    }
}

From source file:jsprit.core.algorithm.acceptor.ExperimentalSchrimpfAcceptance.java

@Override
public void informAlgorithmStarts(VehicleRoutingProblem problem, VehicleRoutingAlgorithm algorithm,
        Collection<VehicleRoutingProblemSolution> solutions) {
    reset();/*from  ww  w .  j  a  va 2s  . co  m*/
    logger.info("---------------------------------------------------------------------");
    logger.info("prepare schrimpfAcceptanceFunction, i.e. determine initial threshold");
    logger.info("start random-walk (see randomWalk.xml)");
    double now = System.currentTimeMillis();
    this.nOfTotalIterations = algorithm.getMaxIterations();

    /*
       * randomWalk to determine standardDev
     */
    final double[] results = new double[nOfRandomWalks];

    URL resource = Resource.getAsURL("randomWalk.xml");
    AlgorithmConfig algorithmConfig = new AlgorithmConfig();
    new AlgorithmConfigXmlReader(algorithmConfig).read(resource);
    VehicleRoutingAlgorithm vra = VehicleRoutingAlgorithms.createAlgorithm(problem, algorithmConfig);
    vra.setMaxIterations(nOfRandomWalks);
    vra.getAlgorithmListeners().addListener(new IterationEndsListener() {

        @Override
        public void informIterationEnds(int iteration, VehicleRoutingProblem problem,
                Collection<VehicleRoutingProblemSolution> solutions) {
            double result = Solutions.bestOf(solutions).getCost();
            //            logger.info("result={}", result);
            results[iteration - 1] = result;
        }

    });
    vra.searchSolutions();

    StandardDeviation dev = new StandardDeviation();
    double standardDeviation = dev.evaluate(results);
    initialThreshold = standardDeviation / 2;

    logger.info("warmup done");
    logger.info("total time: {}s", ((System.currentTimeMillis() - now) / 1000.0));
    logger.info("initial threshold: {}", initialThreshold);
    logger.info("---------------------------------------------------------------------");

}

From source file:net.sf.katta.tool.loadtest.LoadTestMasterOperation.java

@Override
public void nodeOperationsComplete(MasterContext context, List<OperationResult> nodeResults) throws Exception {
    try {/*from ww  w . j  a  va 2  s .  c o m*/
        final int queryRate = calculateCurrentQueryRate();
        LOG.info("collecting results for iteration " + _currentIteration + " and query rate " + queryRate
                + " after " + (System.currentTimeMillis() - _currentIterationStartTime) + " ms ...");
        List<LoadTestQueryResult> queryResults = new ArrayList<LoadTestQueryResult>();
        for (OperationResult operationResult : nodeResults) {
            if (operationResult == null || operationResult.getUnhandledException() != null) {
                Exception rootException = null;
                if (operationResult != null) {
                    rootException = operationResult.getUnhandledException();
                }
                throw new IllegalStateException(
                        "at least one node operation did not completed properly: " + nodeResults,
                        rootException);
            }
            LoadTestNodeOperationResult nodeOperationResult = (LoadTestNodeOperationResult) operationResult;
            queryResults.addAll(nodeOperationResult.getQueryResults());
        }
        LOG.info("Received " + queryResults.size() + " queries, expected " + queryRate * _runTime / 1000);

        File statisticsFile = new File(_resultDir, "load-test-log-" + _startTime + ".log");
        File resultsFile = new File(_resultDir, "load-test-results-" + _startTime + ".log");
        Writer statisticsWriter = new OutputStreamWriter(new FileOutputStream(statisticsFile, true));
        Writer resultWriter = new OutputStreamWriter(new FileOutputStream(resultsFile, true));
        if (_currentIteration == 0) {
            // print headers
            statisticsWriter.append("#queryRate \tnode \tstartTime \tendTime \telapseTime \tquery \n");
            resultWriter.append(
                    "#requestedQueryRate \tachievedQueryRate \tfiredQueries \tqueryErrors \tavarageQueryDuration \tstandardDeviation  \n");
        }
        try {
            StorelessUnivariateStatistic timeStandardDeviation = new StandardDeviation();
            StorelessUnivariateStatistic timeMean = new Mean();
            int errors = 0;

            for (LoadTestQueryResult result : queryResults) {
                long elapsedTime = result.getEndTime() > 0 ? result.getEndTime() - result.getStartTime() : -1;
                statisticsWriter.write(queryRate + "\t" + result.getNodeId() + "\t" + result.getStartTime()
                        + "\t" + result.getEndTime() + "\t" + elapsedTime + "\t" + result.getQuery() + "\n");
                if (elapsedTime != -1) {
                    timeStandardDeviation.increment(elapsedTime);
                    timeMean.increment(elapsedTime);
                } else {
                    ++errors;
                }
            }
            resultWriter.write(queryRate + "\t" + ((double) queryResults.size() / (_runTime / 1000)) + "\t"
                    + queryResults.size() + "\t" + errors + "\t" + (int) timeMean.getResult() + "\t"
                    + (int) timeStandardDeviation.getResult() + "\n");
        } catch (IOException e) {
            throw new IllegalStateException("Failed to write statistics data.", e);
        }
        try {
            LOG.info("results written to " + resultsFile.getAbsolutePath());
            LOG.info("statistics written to " + statisticsFile.getAbsolutePath());
            statisticsWriter.close();
            resultWriter.close();
        } catch (IOException e) {
            LOG.warn("Failed to close statistics file.");
        }
        if (queryRate + _step <= _endRate) {
            _currentIteration++;
            LOG.info("triggering next iteration " + _currentIteration);
            context.getMasterQueue().add(this);
        } else {
            LOG.info("finish load test in iteration " + _currentIteration + " after "
                    + (System.currentTimeMillis() - _startTime) + " ms");
            context.getProtocol().removeFlag(getName());
        }
    } catch (Exception e) {
        context.getProtocol().removeFlag(getName());
    }
}

From source file:jCloisterZone.CarcassonneEnvironment.java

public static void main(String[] args) {
    int repetitions = 100;
    double[] scores = new double[repetitions];

    RRLJCloisterClient client = new LocalCarcassonneClient("config.ini");
    ServerIF server = null;//  w  w  w . j a v a 2  s  . c  o  m
    Game game = client.getGame();
    Player firstPlayer = null;
    ArrayList<PlayerSlot> slots = new ArrayList<PlayerSlot>();
    for (int r = 0; r < repetitions; r++) {
        client.createGame();
        if (game == null) {
            server = new LocalCarcassonneServer(client.getGame());
            PlayerSlot slot = new PlayerSlot(0, PlayerSlot.SlotType.AI, "RANDOM" + 0, client.getClientId());
            slot.setAiClassName(RandomAIPlayer.class.getName());
            slots.add(slot);
            for (int j = 1; j < Integer.parseInt(args[0]); j++) {
                slot = new PlayerSlot(j, PlayerSlot.SlotType.AI, "AI" + j, client.getClientId());
                slot.setAiClassName(LegacyAiPlayer.class.getName());
                slots.add(slot);
            }
            game = client.getGame();
        } else {
            // Reset the UIs
            server.stopGame();
            game.clearUserInterface();

            // Clear the slots and re-add them.
            for (int i = 0; i < PlayerSlot.COUNT; i++) {
                server.updateSlot(new PlayerSlot(i), null);
            }
        }

        Collections.shuffle(slots);
        for (int i = 0; i < slots.size(); i++) {
            PlayerSlot slot = slots.get(i);
            PlayerSlot cloneSlot = new PlayerSlot(i, slot.getType(), slot.getNick(), slot.getOwner());
            cloneSlot.setAiClassName(slot.getAiClassName());
            server.updateSlot(cloneSlot, LegacyAiPlayer.supportedExpansions());
        }

        server.startGame();

        Phase phase = game.getPhase();

        // Cycle through (probably only once) to keep the game moving.
        while (phase != null && !phase.isEntered()) {
            // Modifying phases to proxyless versions
            if (phase.getClass().equals(CreateGamePhase.class))
                phase = game.getPhases().get(ProxylessCreateGamePhase.class);
            if (phase.getClass().equals(DrawPhase.class))
                phase = game.getPhases().get(ProxylessDrawPhase.class);

            phase.setEntered(true);
            phase.enter();
            phase = game.getPhase();

            if (game.getTurnPlayer().getNick().equals("RANDOM0"))
                firstPlayer = game.getTurnPlayer();
        }
        int score = firstPlayer.getPoints();
        scores[r] = score;
        System.out.println(score);
    }

    Mean m = new Mean();
    StandardDeviation sd = new StandardDeviation();
    System.out.println("Mean: " + m.evaluate(scores) + ", SD: " + sd.evaluate(scores));
}

From source file:com.joliciel.jochre.graphics.SourceImageImpl.java

@Override
public Set<Set<RowOfShapes>> getRowClusters() {
    if (rowClusters == null) {
        Mean heightMean = new Mean();
        StandardDeviation heightStdDev = new StandardDeviation();
        List<double[]> rowHeights = new ArrayList<double[]>(this.getRows().size());
        for (RowOfShapes row : this.getRows()) {
            Shape shape = row.getShapes().iterator().next();
            int height = shape.getBaseLine() - shape.getMeanLine();
            rowHeights.add(new double[] { height });
            heightMean.increment(height);
            heightStdDev.increment(height);
        }//from   w  w  w.  ja  v a  2s. co m

        double stdDevHeight = heightStdDev.getResult();
        List<RowOfShapes> rows = new ArrayList<RowOfShapes>(this.getRows());
        DBSCANClusterer<RowOfShapes> clusterer = new DBSCANClusterer<RowOfShapes>(rows, rowHeights);
        rowClusters = clusterer.cluster(stdDevHeight, 2, true);
        LOG.debug("Found " + rowClusters.size() + " row clusters.");
    }
    return rowClusters;
}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

/**
 * Split rows if they're particularly high, and contain considerable white space in the middle.
 * Shapes causing the join will be removed if too high, or attached to the closest row otherwise.
 * @param sourceImage//w  ww  .jav  a  2 s .c om
 * @param regressions
 * @return
 */
void splitRows(SourceImage sourceImage) {
    LOG.debug("########## splitRows #########");

    // Calculate the min row height to be considered for splitting
    double minHeightForSplit = sourceImage.getAverageShapeHeight();
    LOG.debug("minHeightForSplit: " + minHeightForSplit);

    double slopeMean = sourceImage.getMeanHorizontalSlope();

    List<RowOfShapes> candidateRows = new ArrayList<RowOfShapes>();
    for (RowOfShapes row : sourceImage.getRows()) {
        if (row.getRight() == row.getLeft())
            continue;
        int height = row.getBottom() - row.getTop();
        if (height >= minHeightForSplit) {
            LOG.debug("Adding candidate " + row.toString());
            candidateRows.add(row);
        }
    }

    // For each row to be considered for splitting, see if there are lines of white space inside it.
    Hashtable<RowOfShapes, List<RowOfShapes>> splitRows = new Hashtable<RowOfShapes, List<RowOfShapes>>();
    for (RowOfShapes row : candidateRows) {
        SimpleRegression regression = new SimpleRegression();
        // y = intercept + slope * x 
        LOG.debug("Left point: (" + row.getLeft() + " , " + row.getTop() + ")");
        regression.addData(row.getLeft(), row.getTop());
        double rightHandY = row.getTop() + ((double) (row.getRight() - row.getLeft()) * slopeMean);
        LOG.debug("Right point: (" + row.getRight() + " , " + rightHandY + ")");
        regression.addData(row.getRight(), rightHandY);

        int yDelta = (int) Math.ceil(Math.abs(rightHandY - (double) row.getTop()));
        int yInterval = yDelta + (row.getBottom() - row.getTop() + 1) + yDelta;

        LOG.debug("yDelta: " + yDelta);
        LOG.debug("yInterval: " + yInterval);
        // let's get pixel counts shape by shape, and leave out the rest (in case rows overlap vertically)
        int[] pixelCounts = new int[yInterval];
        for (Shape shape : row.getShapes()) {
            LOG.trace("Shape " + shape);
            int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft()));
            LOG.trace("yDeltaAtLeft: " + yDeltaAtLeft);
            // the shape offset + the offset between the regression line and the row top
            // + the delta we left at the start in case the line slopes upwards to the right
            int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta;
            LOG.trace("topIndex: (" + shape.getTop() + " - " + row.getTop() + ") + (" + row.getTop() + " - "
                    + yDeltaAtLeft + ") + " + yDelta + " = " + topIndex);
            for (int x = 0; x < shape.getWidth(); x++) {
                for (int y = 0; y < shape.getHeight(); y++) {
                    if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold())) {
                        pixelCounts[topIndex + y]++;
                    }
                }
            }
        }

        Mean pixelCountMean = new Mean();
        StandardDeviation pixelCountStdDev = new StandardDeviation();
        for (int i = 0; i < yInterval; i++) {
            LOG.debug("Pixel count " + i + ": " + pixelCounts[i]);
            pixelCountMean.increment(pixelCounts[i]);
            pixelCountStdDev.increment(pixelCounts[i]);
        }
        LOG.debug("pixel count mean: " + pixelCountMean.getResult() + ", std dev: "
                + pixelCountStdDev.getResult());

        // If there's a split required, we're going to go considerably above and below the mean several times
        double lowThreshold = pixelCountMean.getResult() / 2.0;
        double highThreshold = pixelCountMean.getResult() * 2.0;
        boolean inRow = false;
        List<Integer> switches = new ArrayList<Integer>();
        for (int i = 0; i < yInterval; i++) {
            if (!inRow && pixelCounts[i] > highThreshold) {
                LOG.debug("In row at " + i + ", pixel count " + pixelCounts[i]);
                inRow = true;
                switches.add(i);
            } else if (inRow && pixelCounts[i] < lowThreshold) {
                LOG.debug("Out of row at " + i + ", pixel count " + pixelCounts[i]);
                inRow = false;
                switches.add(i);
            }
        }
        if (switches.size() > 2) {
            // we have more than one row
            List<Integer> rowSeparations = new ArrayList<Integer>();

            // find the row separators
            for (int switchIndex = 1; switchIndex < switches.size() - 2; switchIndex = switchIndex + 2) {
                int outOfRow = switches.get(switchIndex);
                int intoRow = switches.get(switchIndex + 1);
                int minPixelCount = (int) Math.ceil(highThreshold);
                int minIndex = -1;
                // find the row with the lowest pixel count
                for (int i = outOfRow; i <= intoRow; i++) {
                    if (pixelCounts[i] < minPixelCount) {
                        minPixelCount = pixelCounts[i];
                        minIndex = i;
                    }
                }
                rowSeparations.add(minIndex);
            }

            // separate the shapes among the rows
            List<RowOfShapes> newRows = new ArrayList<RowOfShapes>(rowSeparations.size() + 1);
            for (int i = 0; i <= rowSeparations.size(); i++) {
                newRows.add(graphicsService.getEmptyRow(sourceImage));
            }

            // add a separator at the beginning and end
            rowSeparations.add(0, 0);
            rowSeparations.add(yInterval + 1);
            for (Shape shape : row.getShapes()) {
                int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft()));
                int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta;
                int firstSepAfterShapeBottom = rowSeparations.size();
                int lastSepBeforeShapeTop = -1;

                for (int i = rowSeparations.size() - 1; i >= 0; i--) {
                    int rowSeparation = rowSeparations.get(i);
                    if (rowSeparation <= topIndex) {
                        lastSepBeforeShapeTop = i;
                        break;
                    }
                }

                for (int i = 0; i < rowSeparations.size(); i++) {
                    int rowSeparation = rowSeparations.get(i);
                    if (rowSeparation >= topIndex + shape.getHeight()) {
                        firstSepAfterShapeBottom = i;
                        break;
                    }
                }

                if (lastSepBeforeShapeTop == firstSepAfterShapeBottom - 1) {
                    // shape clearly belongs to one row
                    RowOfShapes newRow = newRows.get(lastSepBeforeShapeTop);
                    newRow.addShape(shape);
                } else {
                    // is the shape much closer to one row than another?
                    // if yes, add it to then add it to this row
                    int[] yPixelsPerRow = new int[newRows.size()];
                    for (int i = 0; i < newRows.size(); i++) {
                        int separatorTop = rowSeparations.get(i);
                        int separatorBottom = rowSeparations.get(i + 1);
                        int top = topIndex < separatorTop ? separatorTop : topIndex;
                        int bottom = topIndex + shape.getHeight() < separatorBottom
                                ? topIndex + shape.getHeight()
                                : separatorBottom;
                        yPixelsPerRow[i] = bottom - top;
                    }

                    int pixelsInMaxRow = 0;
                    int maxPixelRowIndex = -1;
                    for (int i = 0; i < newRows.size(); i++) {
                        if (yPixelsPerRow[i] > pixelsInMaxRow) {
                            pixelsInMaxRow = yPixelsPerRow[i];
                            maxPixelRowIndex = i;
                        }
                    }
                    double minPercentage = 0.8;
                    if (((double) pixelsInMaxRow / (double) shape.getHeight()) >= minPercentage) {
                        RowOfShapes newRow = newRows.get(maxPixelRowIndex);
                        newRow.addShape(shape);
                    } else {
                        // otherwise, the shape needs to be got rid of
                        // as it's causing massive confusion
                        // do this by simply not adding it anywhere
                    }
                } // is the shape in one row exactly?
            } // next shape
            splitRows.put(row, newRows);
        } // do we have more than one row?
    } // next row

    for (RowOfShapes row : splitRows.keySet()) {
        List<RowOfShapes> newRows = splitRows.get(row);
        sourceImage.replaceRow(row, newRows);
    }
}

From source file:ch.ethz.bsse.quasirecomb.model.Preprocessing.java

private static void computeInsertDist(Read[] reads) {
    List<Integer> l = new LinkedList<>();
    StringBuilder insertSB = new StringBuilder();
    int x = 0;//from   www  . ja  v a 2  s  .  c o  m
    for (Read r : reads) {
        if (r.isPaired()) {
            l.add(r.getCrickBegin() - r.getWatsonEnd());
            //                inserts[x++] = ;
            Globals.getINSTANCE().incPAIRED();
        } else if (r.isMerged()) {
            Globals.getINSTANCE().incMERGED();
        }
        for (int i = 0; i < r.getCount(); i++) {
            insertSB.append(r.getInsertion()).append("\n");
        }
    }
    double[] inserts = new double[Globals.getINSTANCE().getPAIRED_COUNT()];
    for (Integer i : l) {
        inserts[x++] = i;
    }
    StatusUpdate.getINSTANCE().println("Insert size\t" + Math.round((new Mean().evaluate(inserts)) * 10) / 10
            + " (" + Math.round(new StandardDeviation().evaluate(inserts) * 10) / 10 + ")");
    Utils.saveFile(Globals.getINSTANCE().getSAVEPATH() + "support" + File.separator + "insertSize.txt",
            insertSB.toString());
}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

void removeOversizedShapes(List<Shape> shapes) {
    LOG.debug("########## removeOversizedShapes #########");
    Mean shapeHeightMean = new Mean();
    Mean shapeWidthMean = new Mean();

    for (Shape shape : shapes) {
        shapeHeightMean.increment(shape.getHeight());
        shapeWidthMean.increment(shape.getWidth());
    }//from ww  w  . ja v  a 2s.  c  o  m

    double heightMean = shapeHeightMean.getResult();
    double widthMean = shapeWidthMean.getResult();
    LOG.debug("heightMean: " + heightMean);
    LOG.debug("widthMean: " + widthMean);

    shapeHeightMean = new Mean();
    shapeWidthMean = new Mean();
    StandardDeviation shapeHeightStdDev = new StandardDeviation();
    for (Shape shape : shapes) {
        if (shape.getHeight() > heightMean && shape.getHeight() < (heightMean * 2.0)
                && shape.getWidth() > widthMean && shape.getWidth() < (widthMean * 2.0)) {
            shapeHeightMean.increment(shape.getHeight());
            shapeHeightStdDev.increment(shape.getHeight());
            shapeWidthMean.increment(shape.getWidth());
        }
    }

    heightMean = shapeHeightMean.getResult();
    widthMean = shapeWidthMean.getResult();
    LOG.debug("average shape heightMean: " + heightMean);
    LOG.debug("average shape widthMean: " + widthMean);

    double minHeightBigShape = heightMean * 6;
    double minWidthWideShape = widthMean * 6;
    double minHeightWideShape = heightMean * 1.5;
    double minHeightTallShape = heightMean * 2.5;
    double maxWidthTallShape = widthMean / 2;
    LOG.debug("minHeightBigShape: " + minHeightBigShape);
    LOG.debug("minWidthWideShape: " + minWidthWideShape);
    LOG.debug("minHeightWideShape: " + minHeightWideShape);
    LOG.debug("minHeightTallShape: " + minHeightTallShape);
    LOG.debug("maxWidthTallShape: " + maxWidthTallShape);

    List<Shape> largeShapes = new ArrayList<Shape>();
    List<Shape> horizontalRules = new ArrayList<Shape>();
    for (Shape shape : shapes) {
        if (shape.getHeight() > minHeightBigShape) {
            LOG.debug("Removing " + shape + " (height)");
            largeShapes.add(shape);
        } else if (shape.getWidth() > minWidthWideShape && shape.getHeight() > minHeightWideShape) {
            // we don't want to remove horizontal bars, but we do want to remove other shapes.
            // why not? I suppose horizontal bars are easily represented as characters?
            LOG.debug("Removing " + shape + " (width)");
            largeShapes.add(shape);
        } else if (shape.getWidth() > minWidthWideShape) {
            // ok, we will remove horizontal rules after all
            LOG.debug("Removing " + shape + " (horizontal rule)");
            largeShapes.add(shape);
            horizontalRules.add(shape);
        } else if (shape.getWidth() <= maxWidthTallShape && shape.getHeight() > minHeightTallShape) {
            LOG.debug("Removing " + shape + " (narrow)");
            largeShapes.add(shape);
        }
    }

    // Only want to remove enclosed shapes if the large shape isn't a frame/grid
    // A) first reduce the shape by 5 percent and see it's cardinality reduces vastly (in which case it's a frame)
    // if so, don't remove enclosed shapes
    // B) next, detect white rectangles within the shape - if they're big enough, don't remove enclosed shapes      LOG.debug("Are large shapes frames or illustrations?");
    double maxFrameCardinalityRatio = 0.5;
    double minFrameWhiteAreaSizeRatio = 0.9;
    List<Shape> illustrations = new ArrayList<Shape>(largeShapes);
    for (Shape largeShape : largeShapes) {
        LOG.debug(largeShape.toString());
        int xOrigin = largeShape.getStartingPoint()[0] - largeShape.getLeft();
        int yOrigin = largeShape.getStartingPoint()[1] - largeShape.getTop();
        Shape dummyShape = graphicsService.getDot(sourceImage, xOrigin, yOrigin);
        // We want to fill up a mirror of the contiguous pixels within this shape,
        // which is what we'll use for further analysis to know
        // if it's a frame or not.
        WritableImageGrid mirror = graphicsService.getEmptyMirror(largeShape);
        this.findContiguousPixels(largeShape, mirror, dummyShape, xOrigin, yOrigin,
                sourceImage.getSeparationThreshold());

        int adjustedLeft = (int) Math.round((double) mirror.getWidth() * 0.05);
        int adjustedRight = (int) Math.round((double) mirror.getWidth() * 0.95);
        int adjustedTop = (int) Math.round((double) mirror.getHeight() * 0.05);
        int adjustedBottom = (int) Math.round((double) mirror.getHeight() * 0.95);

        int cardinality = 0;
        int innerCardinality = 0;
        for (int x = 0; x < mirror.getWidth(); x++) {
            for (int y = 0; y < mirror.getHeight(); y++) {
                if (mirror.getPixel(x, y) > 0) {
                    cardinality++;
                    if (x >= adjustedLeft && x <= adjustedRight && y >= adjustedTop && y <= adjustedBottom)
                        innerCardinality++;
                }
            }
        }

        LOG.debug("cardinality: " + cardinality);
        LOG.debug("innerCardinality: " + innerCardinality);
        double ratio = (double) innerCardinality / (double) cardinality;
        LOG.debug("ratio: " + ratio);
        if (ratio <= maxFrameCardinalityRatio) {
            LOG.debug("maxFrameCardinalityRatio: " + maxFrameCardinalityRatio);
            LOG.debug("Frame by cardinality! Removing from illustrations");
            illustrations.remove(largeShape);
        } else {
            // Now, it could still be a grid
            // to find this out we need to detect white areas inside the shape.
            WhiteAreaFinder whiteAreaFinder = new WhiteAreaFinder();
            double minWhiteAreaWidth = widthMean * 10;
            double minWhiteAreaHeight = heightMean * 4;
            List<Rectangle> whiteAreas = whiteAreaFinder.getWhiteAreas(mirror, 0, 0, 0, mirror.getWidth() - 1,
                    mirror.getHeight() - 1, minWhiteAreaWidth, minWhiteAreaHeight);
            int whiteAreaSize = 0;
            for (Rectangle whiteArea : whiteAreas) {
                whiteAreaSize += (whiteArea.getWidth() * whiteArea.getHeight());
            }

            int totalSize = mirror.getWidth() * mirror.getHeight();
            LOG.debug("whiteAreaSize: " + whiteAreaSize);
            LOG.debug("totalSize: " + totalSize);

            double sizeRatio = (double) whiteAreaSize / (double) totalSize;
            LOG.debug("sizeRatio: " + sizeRatio);

            if (sizeRatio >= minFrameWhiteAreaSizeRatio) {
                LOG.debug("minFrameWhiteAreaSizeRatio: " + minFrameWhiteAreaSizeRatio);
                LOG.debug("Frame by white area size! Removing from illustrations");
                illustrations.remove(largeShape);
            }

        }
    }

    for (Shape largeShape : illustrations) {
        // Add this to large shapes if it's not a "frame"
        // large shapes are used for paragraph detection
        sourceImage.getLargeShapes().add(largeShape);
    }

    // remove shapes that are enclosed inside illustrations
    List<Shape> enclosedShapesToDelete = new ArrayList<Shape>();
    int extension = 5;
    for (Shape shape : shapes) {
        for (Shape shapeToDelete : illustrations) {
            if (shape.getLeft() >= shapeToDelete.getLeft() - extension
                    && shape.getRight() <= shapeToDelete.getRight() + extension
                    && shape.getTop() >= shapeToDelete.getTop() - extension
                    && shape.getBottom() <= shapeToDelete.getBottom() + extension) {
                LOG.debug("Enclosed shape: " + shape);
                LOG.debug(" enclosed by " + shapeToDelete);
                enclosedShapesToDelete.add(shape);
            }
        }
    }

    shapes.removeAll(largeShapes);
    shapes.removeAll(enclosedShapesToDelete);

    // remove shapes that are practically touching horizontal rules (probably segments of the rule that got split)
    extension = 3;
    List<Shape> listToTestAgainst = horizontalRules;
    for (int i = 0; i < 3; i++) {
        List<Shape> horizontalRuleSegments = new ArrayList<Shape>();
        for (Shape horizontalRule : listToTestAgainst) {
            for (Shape shape : shapes) {
                if ((shape.getLeft() <= horizontalRule.getRight() + extension
                        || shape.getRight() >= horizontalRule.getLeft() - extension)
                        && shape.getTop() >= horizontalRule.getTop() - extension
                        && shape.getBottom() <= horizontalRule.getBottom() + extension) {
                    LOG.debug("Horizontal rule segment: " + shape);
                    LOG.debug(" touching " + horizontalRule);
                    horizontalRuleSegments.add(shape);
                    enclosedShapesToDelete.add(shape);
                }
            }
        }
        shapes.removeAll(horizontalRuleSegments);
        listToTestAgainst = horizontalRuleSegments;
        if (listToTestAgainst.size() == 0)
            break;
    }

}