Example usage for org.apache.commons.math.stat.descriptive.moment StandardDeviation StandardDeviation

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive.moment StandardDeviation StandardDeviation.

Prototype

public StandardDeviation()

Source Link

Document

Constructs a StandardDeviation.

Usage

From source file:de.tudarmstadt.ukp.dkpro.tc.mallet.report.MalletBatchCrossValidationReport.java

@Override
public void execute() throws Exception {
    StorageService store = getContext().getStorageService();

    FlexTable<String> table = FlexTable.forClass(String.class);

    Map<String, List<Double>> key2resultValues = new HashMap<String, List<Double>>();

    for (TaskContextMetadata subcontext : getSubtasks()) {
        String name = BatchTask.class.getSimpleName() + "CrossValidation";
        // one CV batch (which internally ran numFolds times)
        if (subcontext.getLabel().startsWith(name)) {
            Map<String, String> discriminatorsMap = store
                    .retrieveBinary(subcontext.getId(), Task.DISCRIMINATORS_KEY, new PropertiesAdapter())
                    .getMap();/*from ww  w. ja  v a 2  s.c  om*/

            File eval = store.getStorageFolder(subcontext.getId(), EVAL_FILE_NAME + SUFFIX_CSV);

            Map<String, String> resultMap = new HashMap<String, String>();

            String[][] evalMatrix = null;

            int i = 0;
            for (String line : FileUtils.readLines(eval)) {
                String[] tokenizedLine = StrTokenizer.getCSVInstance(line).getTokenArray();
                if (evalMatrix == null) {
                    evalMatrix = new String[FileUtils.readLines(eval).size()][tokenizedLine.length];
                }
                evalMatrix[i] = tokenizedLine;
                i++;
            }

            // columns
            for (int j = 0; j < evalMatrix[0].length; j++) {
                String header = evalMatrix[0][j];
                String[] vals = new String[evalMatrix.length - 1];
                // rows
                for (int k = 1; k < evalMatrix.length; k++) {
                    if (evalMatrix[k][j].equals("null")) {
                        vals[k - 1] = String.valueOf(0.);
                    } else {
                        vals[k - 1] = evalMatrix[k][j];
                    }
                }
                Mean mean = new Mean();
                Sum sum = new Sum();
                StandardDeviation std = new StandardDeviation();

                double[] dVals = new double[vals.length];
                Set<String> sVals = new HashSet<String>();
                for (int k = 0; k < vals.length; k++) {
                    try {
                        dVals[k] = Double.parseDouble(vals[k]);
                        sVals = null;
                    } catch (NumberFormatException e) {
                        dVals = null;
                        sVals.add(vals[k]);
                    }
                }

                if (dVals != null) {
                    if (nonAveragedResultsMeasures.contains(header)) {
                        resultMap.put(header, String.valueOf(sum.evaluate(dVals)));
                    } else {
                        resultMap.put(header, String.valueOf(mean.evaluate(dVals)) + "\u00B1"
                                + String.valueOf(std.evaluate(dVals)));
                    }
                } else {
                    if (sVals.size() > 1) {
                        resultMap.put(header, "---");
                    } else {
                        resultMap.put(header, vals[0]);
                    }
                }
            }

            String key = getKey(discriminatorsMap);

            List<Double> results;
            if (key2resultValues.get(key) == null) {
                results = new ArrayList<Double>();
            } else {
                results = key2resultValues.get(key);

            }
            key2resultValues.put(key, results);

            Map<String, String> values = new HashMap<String, String>();
            Map<String, String> cleanedDiscriminatorsMap = new HashMap<String, String>();

            for (String disc : discriminatorsMap.keySet()) {
                if (!ReportUtils.containsExcludePattern(disc, discriminatorsToExclude)) {
                    cleanedDiscriminatorsMap.put(disc, discriminatorsMap.get(disc));
                }
            }
            values.putAll(cleanedDiscriminatorsMap);
            values.putAll(resultMap);

            table.addRow(subcontext.getLabel(), values);
        }
    }

    getContext().getLoggingService().message(getContextLabel(), ReportUtils.getPerformanceOverview(table));

    // Excel cannot cope with more than 255 columns
    if (table.getColumnIds().length <= 255) {
        getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_EXCEL, table.getExcelWriter());
    }
    getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_CSV, table.getCsvWriter());

    table.setCompact(false);
    // Excel cannot cope with more than 255 columns
    if (table.getColumnIds().length <= 255) {
        getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_EXCEL, table.getExcelWriter());
    }
    getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_CSV, table.getCsvWriter());

    // output the location of the batch evaluation folder
    // otherwise it might be hard for novice users to locate this
    File dummyFolder = store.getStorageFolder(getContext().getId(), "dummy");
    // TODO can we also do this without creating and deleting the dummy folder?
    getContext().getLoggingService().message(getContextLabel(),
            "Storing detailed results in:\n" + dummyFolder.getParent() + "\n");
    dummyFolder.delete();
}

From source file:de.tudarmstadt.ukp.dkpro.tc.crfsuite.CRFSuiteBatchCrossValidationReport.java

@Override
public void execute() throws Exception {
    StorageService store = getContext().getStorageService();

    FlexTable<String> table = FlexTable.forClass(String.class);

    Map<String, List<Double>> key2resultValues = new HashMap<String, List<Double>>();

    for (TaskContextMetadata subcontext : getSubtasks()) {
        String name = ExperimentCrossValidation.class.getSimpleName();
        // one CV batch (which internally ran numFolds times)
        if (subcontext.getLabel().startsWith(name)) {
            Map<String, String> discriminatorsMap = store
                    .retrieveBinary(subcontext.getId(), Task.DISCRIMINATORS_KEY, new PropertiesAdapter())
                    .getMap();/*from   w  ww .j a  va 2  s .  c o m*/

            File eval = store.getStorageFolder(subcontext.getId(), EVAL_FILE_NAME + SUFFIX_CSV);

            Map<String, String> resultMap = new HashMap<String, String>();

            String[][] evalMatrix = null;

            int i = 0;
            for (String line : FileUtils.readLines(eval)) {
                String[] tokenizedLine = StrTokenizer.getCSVInstance(line).getTokenArray();
                if (evalMatrix == null) {
                    evalMatrix = new String[FileUtils.readLines(eval).size()][tokenizedLine.length];
                }
                evalMatrix[i] = tokenizedLine;
                i++;
            }

            // columns
            for (int j = 0; j < evalMatrix[0].length; j++) {
                String header = evalMatrix[0][j];
                String[] vals = new String[evalMatrix.length - 1];
                // rows
                for (int k = 1; k < evalMatrix.length; k++) {
                    if (evalMatrix[k][j].equals("null")) {
                        vals[k - 1] = String.valueOf(0.);
                    } else {
                        vals[k - 1] = evalMatrix[k][j];
                    }

                }
                Mean mean = new Mean();
                Sum sum = new Sum();
                StandardDeviation std = new StandardDeviation();

                double[] dVals = new double[vals.length];
                Set<String> sVals = new HashSet<String>();
                for (int k = 0; k < vals.length; k++) {
                    try {
                        dVals[k] = Double.parseDouble(vals[k]);
                        sVals = null;
                    } catch (NumberFormatException e) {
                        dVals = null;
                        sVals.add(vals[k]);
                    }
                }

                if (dVals != null) {
                    if (nonAveragedResultsMeasures.contains(header)) {
                        resultMap.put(header + foldSum, String.valueOf(sum.evaluate(dVals)));
                    } else {
                        resultMap.put(header + foldAveraged, String.valueOf(
                                mean.evaluate(dVals) + "\u00B1" + String.valueOf(std.evaluate(dVals))));
                    }
                } else {
                    if (sVals.size() > 1) {
                        resultMap.put(header, "---");
                    } else {
                        resultMap.put(header, vals[0]);
                    }
                }
            }

            String key = getKey(discriminatorsMap);

            List<Double> results;
            if (key2resultValues.get(key) == null) {
                results = new ArrayList<Double>();
            } else {
                results = key2resultValues.get(key);

            }
            key2resultValues.put(key, results);

            Map<String, String> values = new HashMap<String, String>();
            Map<String, String> cleanedDiscriminatorsMap = new HashMap<String, String>();

            for (String disc : discriminatorsMap.keySet()) {
                if (!ReportUtils.containsExcludePattern(disc, discriminatorsToExclude)) {
                    cleanedDiscriminatorsMap.put(disc, discriminatorsMap.get(disc));
                }
            }
            values.putAll(cleanedDiscriminatorsMap);
            values.putAll(resultMap);

            table.addRow(subcontext.getLabel(), values);
        }
    }

    getContext().getLoggingService().message(getContextLabel(), ReportUtils.getPerformanceOverview(table));
    // Excel cannot cope with more than 255 columns
    if (table.getColumnIds().length <= 255) {
        getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_EXCEL, table.getExcelWriter());
    }
    getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_CSV, table.getCsvWriter());

    table.setCompact(false);
    // Excel cannot cope with more than 255 columns
    if (table.getColumnIds().length <= 255) {
        getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_EXCEL, table.getExcelWriter());
    }
    getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_CSV, table.getCsvWriter());

    // output the location of the batch evaluation folder
    // otherwise it might be hard for novice users to locate this
    File dummyFolder = store.getStorageFolder(getContext().getId(), "dummy");
    // TODO can we also do this without creating and deleting the dummy folder?
    getContext().getLoggingService().message(getContextLabel(),
            "Storing detailed results in:\n" + dummyFolder.getParent() + "\n");
    dummyFolder.delete();
}

From source file:cerrla.Performance.java

/**
 * Records performance scores using sliding windows of results.
 * /* ww  w  .j a  v  a  2  s . c  o m*/
 * @param currentEpisode
 *            The current episode.
 */
public void recordPerformanceScore(int currentEpisode) {
    if (recentScores_.isEmpty())
        return;
    // Transform the queues into arrays
    double[] vals = new double[recentScores_.size()];
    int i = 0;
    for (Double val : recentScores_)
        vals[i++] = val.doubleValue();
    double[] envSDs = new double[internalSDs_.size()];
    i = 0;
    for (Double envSD : internalSDs_)
        envSDs[i++] = envSD.doubleValue();

    Mean m = new Mean();
    StandardDeviation sd = new StandardDeviation();
    double mean = m.evaluate(vals);
    double meanDeviation = sd.evaluate(envSDs) * CONVERGENCE_PERCENT_BUFFER;

    Double[] details = new Double[PerformanceDetails.values().length];
    details[PerformanceDetails.EPISODE.ordinal()] = Double.valueOf(currentEpisode);
    details[PerformanceDetails.MEAN.ordinal()] = mean;
    details[PerformanceDetails.SD.ordinal()] = sd.evaluate(vals);
    performanceDetails_.put(currentEpisode, details);

    // Output current means
    if (ProgramArgument.SYSTEM_OUTPUT.booleanValue() && !frozen_) {
        DecimalFormat formatter = new DecimalFormat("#0.00");
        String meanString = formatter.format(mean);
        String sdString = formatter.format(meanDeviation);
        System.out.println("Average performance: " + meanString + " " + SD_SYMBOL + " " + sdString);
    }
    if (frozen_) {
        System.out.println(currentEpisode + ": " + details[PerformanceDetails.MEAN.ordinal()]);
    }
}

From source file:jsprit.core.algorithm.acceptor.ExperimentalSchrimpfAcceptance.java

@Override
public void informAlgorithmStarts(VehicleRoutingProblem problem, VehicleRoutingAlgorithm algorithm,
        Collection<VehicleRoutingProblemSolution> solutions) {
    reset();/*from  ww  w .  j  a  va 2s  . co  m*/
    logger.info("---------------------------------------------------------------------");
    logger.info("prepare schrimpfAcceptanceFunction, i.e. determine initial threshold");
    logger.info("start random-walk (see randomWalk.xml)");
    double now = System.currentTimeMillis();
    this.nOfTotalIterations = algorithm.getMaxIterations();

    /*
       * randomWalk to determine standardDev
     */
    final double[] results = new double[nOfRandomWalks];

    URL resource = Resource.getAsURL("randomWalk.xml");
    AlgorithmConfig algorithmConfig = new AlgorithmConfig();
    new AlgorithmConfigXmlReader(algorithmConfig).read(resource);
    VehicleRoutingAlgorithm vra = VehicleRoutingAlgorithms.createAlgorithm(problem, algorithmConfig);
    vra.setMaxIterations(nOfRandomWalks);
    vra.getAlgorithmListeners().addListener(new IterationEndsListener() {

        @Override
        public void informIterationEnds(int iteration, VehicleRoutingProblem problem,
                Collection<VehicleRoutingProblemSolution> solutions) {
            double result = Solutions.bestOf(solutions).getCost();
            //            logger.info("result={}", result);
            results[iteration - 1] = result;
        }

    });
    vra.searchSolutions();

    StandardDeviation dev = new StandardDeviation();
    double standardDeviation = dev.evaluate(results);
    initialThreshold = standardDeviation / 2;

    logger.info("warmup done");
    logger.info("total time: {}s", ((System.currentTimeMillis() - now) / 1000.0));
    logger.info("initial threshold: {}", initialThreshold);
    logger.info("---------------------------------------------------------------------");

}

From source file:net.sf.katta.tool.loadtest.LoadTestMasterOperation.java

@Override
public void nodeOperationsComplete(MasterContext context, List<OperationResult> nodeResults) throws Exception {
    try {/*from ww  w . j  a  va 2  s .  c o m*/
        final int queryRate = calculateCurrentQueryRate();
        LOG.info("collecting results for iteration " + _currentIteration + " and query rate " + queryRate
                + " after " + (System.currentTimeMillis() - _currentIterationStartTime) + " ms ...");
        List<LoadTestQueryResult> queryResults = new ArrayList<LoadTestQueryResult>();
        for (OperationResult operationResult : nodeResults) {
            if (operationResult == null || operationResult.getUnhandledException() != null) {
                Exception rootException = null;
                if (operationResult != null) {
                    rootException = operationResult.getUnhandledException();
                }
                throw new IllegalStateException(
                        "at least one node operation did not completed properly: " + nodeResults,
                        rootException);
            }
            LoadTestNodeOperationResult nodeOperationResult = (LoadTestNodeOperationResult) operationResult;
            queryResults.addAll(nodeOperationResult.getQueryResults());
        }
        LOG.info("Received " + queryResults.size() + " queries, expected " + queryRate * _runTime / 1000);

        File statisticsFile = new File(_resultDir, "load-test-log-" + _startTime + ".log");
        File resultsFile = new File(_resultDir, "load-test-results-" + _startTime + ".log");
        Writer statisticsWriter = new OutputStreamWriter(new FileOutputStream(statisticsFile, true));
        Writer resultWriter = new OutputStreamWriter(new FileOutputStream(resultsFile, true));
        if (_currentIteration == 0) {
            // print headers
            statisticsWriter.append("#queryRate \tnode \tstartTime \tendTime \telapseTime \tquery \n");
            resultWriter.append(
                    "#requestedQueryRate \tachievedQueryRate \tfiredQueries \tqueryErrors \tavarageQueryDuration \tstandardDeviation  \n");
        }
        try {
            StorelessUnivariateStatistic timeStandardDeviation = new StandardDeviation();
            StorelessUnivariateStatistic timeMean = new Mean();
            int errors = 0;

            for (LoadTestQueryResult result : queryResults) {
                long elapsedTime = result.getEndTime() > 0 ? result.getEndTime() - result.getStartTime() : -1;
                statisticsWriter.write(queryRate + "\t" + result.getNodeId() + "\t" + result.getStartTime()
                        + "\t" + result.getEndTime() + "\t" + elapsedTime + "\t" + result.getQuery() + "\n");
                if (elapsedTime != -1) {
                    timeStandardDeviation.increment(elapsedTime);
                    timeMean.increment(elapsedTime);
                } else {
                    ++errors;
                }
            }
            resultWriter.write(queryRate + "\t" + ((double) queryResults.size() / (_runTime / 1000)) + "\t"
                    + queryResults.size() + "\t" + errors + "\t" + (int) timeMean.getResult() + "\t"
                    + (int) timeStandardDeviation.getResult() + "\n");
        } catch (IOException e) {
            throw new IllegalStateException("Failed to write statistics data.", e);
        }
        try {
            LOG.info("results written to " + resultsFile.getAbsolutePath());
            LOG.info("statistics written to " + statisticsFile.getAbsolutePath());
            statisticsWriter.close();
            resultWriter.close();
        } catch (IOException e) {
            LOG.warn("Failed to close statistics file.");
        }
        if (queryRate + _step <= _endRate) {
            _currentIteration++;
            LOG.info("triggering next iteration " + _currentIteration);
            context.getMasterQueue().add(this);
        } else {
            LOG.info("finish load test in iteration " + _currentIteration + " after "
                    + (System.currentTimeMillis() - _startTime) + " ms");
            context.getProtocol().removeFlag(getName());
        }
    } catch (Exception e) {
        context.getProtocol().removeFlag(getName());
    }
}

From source file:jCloisterZone.CarcassonneEnvironment.java

public static void main(String[] args) {
    int repetitions = 100;
    double[] scores = new double[repetitions];

    RRLJCloisterClient client = new LocalCarcassonneClient("config.ini");
    ServerIF server = null;//  w  w  w . j a v a 2  s  . c  o  m
    Game game = client.getGame();
    Player firstPlayer = null;
    ArrayList<PlayerSlot> slots = new ArrayList<PlayerSlot>();
    for (int r = 0; r < repetitions; r++) {
        client.createGame();
        if (game == null) {
            server = new LocalCarcassonneServer(client.getGame());
            PlayerSlot slot = new PlayerSlot(0, PlayerSlot.SlotType.AI, "RANDOM" + 0, client.getClientId());
            slot.setAiClassName(RandomAIPlayer.class.getName());
            slots.add(slot);
            for (int j = 1; j < Integer.parseInt(args[0]); j++) {
                slot = new PlayerSlot(j, PlayerSlot.SlotType.AI, "AI" + j, client.getClientId());
                slot.setAiClassName(LegacyAiPlayer.class.getName());
                slots.add(slot);
            }
            game = client.getGame();
        } else {
            // Reset the UIs
            server.stopGame();
            game.clearUserInterface();

            // Clear the slots and re-add them.
            for (int i = 0; i < PlayerSlot.COUNT; i++) {
                server.updateSlot(new PlayerSlot(i), null);
            }
        }

        Collections.shuffle(slots);
        for (int i = 0; i < slots.size(); i++) {
            PlayerSlot slot = slots.get(i);
            PlayerSlot cloneSlot = new PlayerSlot(i, slot.getType(), slot.getNick(), slot.getOwner());
            cloneSlot.setAiClassName(slot.getAiClassName());
            server.updateSlot(cloneSlot, LegacyAiPlayer.supportedExpansions());
        }

        server.startGame();

        Phase phase = game.getPhase();

        // Cycle through (probably only once) to keep the game moving.
        while (phase != null && !phase.isEntered()) {
            // Modifying phases to proxyless versions
            if (phase.getClass().equals(CreateGamePhase.class))
                phase = game.getPhases().get(ProxylessCreateGamePhase.class);
            if (phase.getClass().equals(DrawPhase.class))
                phase = game.getPhases().get(ProxylessDrawPhase.class);

            phase.setEntered(true);
            phase.enter();
            phase = game.getPhase();

            if (game.getTurnPlayer().getNick().equals("RANDOM0"))
                firstPlayer = game.getTurnPlayer();
        }
        int score = firstPlayer.getPoints();
        scores[r] = score;
        System.out.println(score);
    }

    Mean m = new Mean();
    StandardDeviation sd = new StandardDeviation();
    System.out.println("Mean: " + m.evaluate(scores) + ", SD: " + sd.evaluate(scores));
}

From source file:com.joliciel.jochre.graphics.SourceImageImpl.java

@Override
public Set<Set<RowOfShapes>> getRowClusters() {
    if (rowClusters == null) {
        Mean heightMean = new Mean();
        StandardDeviation heightStdDev = new StandardDeviation();
        List<double[]> rowHeights = new ArrayList<double[]>(this.getRows().size());
        for (RowOfShapes row : this.getRows()) {
            Shape shape = row.getShapes().iterator().next();
            int height = shape.getBaseLine() - shape.getMeanLine();
            rowHeights.add(new double[] { height });
            heightMean.increment(height);
            heightStdDev.increment(height);
        }//from   w  w  w.  ja  v a  2s. co m

        double stdDevHeight = heightStdDev.getResult();
        List<RowOfShapes> rows = new ArrayList<RowOfShapes>(this.getRows());
        DBSCANClusterer<RowOfShapes> clusterer = new DBSCANClusterer<RowOfShapes>(rows, rowHeights);
        rowClusters = clusterer.cluster(stdDevHeight, 2, true);
        LOG.debug("Found " + rowClusters.size() + " row clusters.");
    }
    return rowClusters;
}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

/**
 * Split rows if they're particularly high, and contain considerable white space in the middle.
 * Shapes causing the join will be removed if too high, or attached to the closest row otherwise.
 * @param sourceImage//w  ww  .jav  a  2 s .c om
 * @param regressions
 * @return
 */
void splitRows(SourceImage sourceImage) {
    LOG.debug("########## splitRows #########");

    // Calculate the min row height to be considered for splitting
    double minHeightForSplit = sourceImage.getAverageShapeHeight();
    LOG.debug("minHeightForSplit: " + minHeightForSplit);

    double slopeMean = sourceImage.getMeanHorizontalSlope();

    List<RowOfShapes> candidateRows = new ArrayList<RowOfShapes>();
    for (RowOfShapes row : sourceImage.getRows()) {
        if (row.getRight() == row.getLeft())
            continue;
        int height = row.getBottom() - row.getTop();
        if (height >= minHeightForSplit) {
            LOG.debug("Adding candidate " + row.toString());
            candidateRows.add(row);
        }
    }

    // For each row to be considered for splitting, see if there are lines of white space inside it.
    Hashtable<RowOfShapes, List<RowOfShapes>> splitRows = new Hashtable<RowOfShapes, List<RowOfShapes>>();
    for (RowOfShapes row : candidateRows) {
        SimpleRegression regression = new SimpleRegression();
        // y = intercept + slope * x 
        LOG.debug("Left point: (" + row.getLeft() + " , " + row.getTop() + ")");
        regression.addData(row.getLeft(), row.getTop());
        double rightHandY = row.getTop() + ((double) (row.getRight() - row.getLeft()) * slopeMean);
        LOG.debug("Right point: (" + row.getRight() + " , " + rightHandY + ")");
        regression.addData(row.getRight(), rightHandY);

        int yDelta = (int) Math.ceil(Math.abs(rightHandY - (double) row.getTop()));
        int yInterval = yDelta + (row.getBottom() - row.getTop() + 1) + yDelta;

        LOG.debug("yDelta: " + yDelta);
        LOG.debug("yInterval: " + yInterval);
        // let's get pixel counts shape by shape, and leave out the rest (in case rows overlap vertically)
        int[] pixelCounts = new int[yInterval];
        for (Shape shape : row.getShapes()) {
            LOG.trace("Shape " + shape);
            int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft()));
            LOG.trace("yDeltaAtLeft: " + yDeltaAtLeft);
            // the shape offset + the offset between the regression line and the row top
            // + the delta we left at the start in case the line slopes upwards to the right
            int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta;
            LOG.trace("topIndex: (" + shape.getTop() + " - " + row.getTop() + ") + (" + row.getTop() + " - "
                    + yDeltaAtLeft + ") + " + yDelta + " = " + topIndex);
            for (int x = 0; x < shape.getWidth(); x++) {
                for (int y = 0; y < shape.getHeight(); y++) {
                    if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold())) {
                        pixelCounts[topIndex + y]++;
                    }
                }
            }
        }

        Mean pixelCountMean = new Mean();
        StandardDeviation pixelCountStdDev = new StandardDeviation();
        for (int i = 0; i < yInterval; i++) {
            LOG.debug("Pixel count " + i + ": " + pixelCounts[i]);
            pixelCountMean.increment(pixelCounts[i]);
            pixelCountStdDev.increment(pixelCounts[i]);
        }
        LOG.debug("pixel count mean: " + pixelCountMean.getResult() + ", std dev: "
                + pixelCountStdDev.getResult());

        // If there's a split required, we're going to go considerably above and below the mean several times
        double lowThreshold = pixelCountMean.getResult() / 2.0;
        double highThreshold = pixelCountMean.getResult() * 2.0;
        boolean inRow = false;
        List<Integer> switches = new ArrayList<Integer>();
        for (int i = 0; i < yInterval; i++) {
            if (!inRow && pixelCounts[i] > highThreshold) {
                LOG.debug("In row at " + i + ", pixel count " + pixelCounts[i]);
                inRow = true;
                switches.add(i);
            } else if (inRow && pixelCounts[i] < lowThreshold) {
                LOG.debug("Out of row at " + i + ", pixel count " + pixelCounts[i]);
                inRow = false;
                switches.add(i);
            }
        }
        if (switches.size() > 2) {
            // we have more than one row
            List<Integer> rowSeparations = new ArrayList<Integer>();

            // find the row separators
            for (int switchIndex = 1; switchIndex < switches.size() - 2; switchIndex = switchIndex + 2) {
                int outOfRow = switches.get(switchIndex);
                int intoRow = switches.get(switchIndex + 1);
                int minPixelCount = (int) Math.ceil(highThreshold);
                int minIndex = -1;
                // find the row with the lowest pixel count
                for (int i = outOfRow; i <= intoRow; i++) {
                    if (pixelCounts[i] < minPixelCount) {
                        minPixelCount = pixelCounts[i];
                        minIndex = i;
                    }
                }
                rowSeparations.add(minIndex);
            }

            // separate the shapes among the rows
            List<RowOfShapes> newRows = new ArrayList<RowOfShapes>(rowSeparations.size() + 1);
            for (int i = 0; i <= rowSeparations.size(); i++) {
                newRows.add(graphicsService.getEmptyRow(sourceImage));
            }

            // add a separator at the beginning and end
            rowSeparations.add(0, 0);
            rowSeparations.add(yInterval + 1);
            for (Shape shape : row.getShapes()) {
                int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft()));
                int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta;
                int firstSepAfterShapeBottom = rowSeparations.size();
                int lastSepBeforeShapeTop = -1;

                for (int i = rowSeparations.size() - 1; i >= 0; i--) {
                    int rowSeparation = rowSeparations.get(i);
                    if (rowSeparation <= topIndex) {
                        lastSepBeforeShapeTop = i;
                        break;
                    }
                }

                for (int i = 0; i < rowSeparations.size(); i++) {
                    int rowSeparation = rowSeparations.get(i);
                    if (rowSeparation >= topIndex + shape.getHeight()) {
                        firstSepAfterShapeBottom = i;
                        break;
                    }
                }

                if (lastSepBeforeShapeTop == firstSepAfterShapeBottom - 1) {
                    // shape clearly belongs to one row
                    RowOfShapes newRow = newRows.get(lastSepBeforeShapeTop);
                    newRow.addShape(shape);
                } else {
                    // is the shape much closer to one row than another?
                    // if yes, add it to then add it to this row
                    int[] yPixelsPerRow = new int[newRows.size()];
                    for (int i = 0; i < newRows.size(); i++) {
                        int separatorTop = rowSeparations.get(i);
                        int separatorBottom = rowSeparations.get(i + 1);
                        int top = topIndex < separatorTop ? separatorTop : topIndex;
                        int bottom = topIndex + shape.getHeight() < separatorBottom
                                ? topIndex + shape.getHeight()
                                : separatorBottom;
                        yPixelsPerRow[i] = bottom - top;
                    }

                    int pixelsInMaxRow = 0;
                    int maxPixelRowIndex = -1;
                    for (int i = 0; i < newRows.size(); i++) {
                        if (yPixelsPerRow[i] > pixelsInMaxRow) {
                            pixelsInMaxRow = yPixelsPerRow[i];
                            maxPixelRowIndex = i;
                        }
                    }
                    double minPercentage = 0.8;
                    if (((double) pixelsInMaxRow / (double) shape.getHeight()) >= minPercentage) {
                        RowOfShapes newRow = newRows.get(maxPixelRowIndex);
                        newRow.addShape(shape);
                    } else {
                        // otherwise, the shape needs to be got rid of
                        // as it's causing massive confusion
                        // do this by simply not adding it anywhere
                    }
                } // is the shape in one row exactly?
            } // next shape
            splitRows.put(row, newRows);
        } // do we have more than one row?
    } // next row

    for (RowOfShapes row : splitRows.keySet()) {
        List<RowOfShapes> newRows = splitRows.get(row);
        sourceImage.replaceRow(row, newRows);
    }
}

From source file:ch.ethz.bsse.quasirecomb.model.Preprocessing.java

private static void computeInsertDist(Read[] reads) {
    List<Integer> l = new LinkedList<>();
    StringBuilder insertSB = new StringBuilder();
    int x = 0;//from   www  . ja  v a 2  s  .  c o  m
    for (Read r : reads) {
        if (r.isPaired()) {
            l.add(r.getCrickBegin() - r.getWatsonEnd());
            //                inserts[x++] = ;
            Globals.getINSTANCE().incPAIRED();
        } else if (r.isMerged()) {
            Globals.getINSTANCE().incMERGED();
        }
        for (int i = 0; i < r.getCount(); i++) {
            insertSB.append(r.getInsertion()).append("\n");
        }
    }
    double[] inserts = new double[Globals.getINSTANCE().getPAIRED_COUNT()];
    for (Integer i : l) {
        inserts[x++] = i;
    }
    StatusUpdate.getINSTANCE().println("Insert size\t" + Math.round((new Mean().evaluate(inserts)) * 10) / 10
            + " (" + Math.round(new StandardDeviation().evaluate(inserts) * 10) / 10 + ")");
    Utils.saveFile(Globals.getINSTANCE().getSAVEPATH() + "support" + File.separator + "insertSize.txt",
            insertSB.toString());
}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

void removeOversizedShapes(List<Shape> shapes) {
    LOG.debug("########## removeOversizedShapes #########");
    Mean shapeHeightMean = new Mean();
    Mean shapeWidthMean = new Mean();

    for (Shape shape : shapes) {
        shapeHeightMean.increment(shape.getHeight());
        shapeWidthMean.increment(shape.getWidth());
    }//from ww  w  . ja v  a 2s.  c  o  m

    double heightMean = shapeHeightMean.getResult();
    double widthMean = shapeWidthMean.getResult();
    LOG.debug("heightMean: " + heightMean);
    LOG.debug("widthMean: " + widthMean);

    shapeHeightMean = new Mean();
    shapeWidthMean = new Mean();
    StandardDeviation shapeHeightStdDev = new StandardDeviation();
    for (Shape shape : shapes) {
        if (shape.getHeight() > heightMean && shape.getHeight() < (heightMean * 2.0)
                && shape.getWidth() > widthMean && shape.getWidth() < (widthMean * 2.0)) {
            shapeHeightMean.increment(shape.getHeight());
            shapeHeightStdDev.increment(shape.getHeight());
            shapeWidthMean.increment(shape.getWidth());
        }
    }

    heightMean = shapeHeightMean.getResult();
    widthMean = shapeWidthMean.getResult();
    LOG.debug("average shape heightMean: " + heightMean);
    LOG.debug("average shape widthMean: " + widthMean);

    double minHeightBigShape = heightMean * 6;
    double minWidthWideShape = widthMean * 6;
    double minHeightWideShape = heightMean * 1.5;
    double minHeightTallShape = heightMean * 2.5;
    double maxWidthTallShape = widthMean / 2;
    LOG.debug("minHeightBigShape: " + minHeightBigShape);
    LOG.debug("minWidthWideShape: " + minWidthWideShape);
    LOG.debug("minHeightWideShape: " + minHeightWideShape);
    LOG.debug("minHeightTallShape: " + minHeightTallShape);
    LOG.debug("maxWidthTallShape: " + maxWidthTallShape);

    List<Shape> largeShapes = new ArrayList<Shape>();
    List<Shape> horizontalRules = new ArrayList<Shape>();
    for (Shape shape : shapes) {
        if (shape.getHeight() > minHeightBigShape) {
            LOG.debug("Removing " + shape + " (height)");
            largeShapes.add(shape);
        } else if (shape.getWidth() > minWidthWideShape && shape.getHeight() > minHeightWideShape) {
            // we don't want to remove horizontal bars, but we do want to remove other shapes.
            // why not? I suppose horizontal bars are easily represented as characters?
            LOG.debug("Removing " + shape + " (width)");
            largeShapes.add(shape);
        } else if (shape.getWidth() > minWidthWideShape) {
            // ok, we will remove horizontal rules after all
            LOG.debug("Removing " + shape + " (horizontal rule)");
            largeShapes.add(shape);
            horizontalRules.add(shape);
        } else if (shape.getWidth() <= maxWidthTallShape && shape.getHeight() > minHeightTallShape) {
            LOG.debug("Removing " + shape + " (narrow)");
            largeShapes.add(shape);
        }
    }

    // Only want to remove enclosed shapes if the large shape isn't a frame/grid
    // A) first reduce the shape by 5 percent and see it's cardinality reduces vastly (in which case it's a frame)
    // if so, don't remove enclosed shapes
    // B) next, detect white rectangles within the shape - if they're big enough, don't remove enclosed shapes      LOG.debug("Are large shapes frames or illustrations?");
    double maxFrameCardinalityRatio = 0.5;
    double minFrameWhiteAreaSizeRatio = 0.9;
    List<Shape> illustrations = new ArrayList<Shape>(largeShapes);
    for (Shape largeShape : largeShapes) {
        LOG.debug(largeShape.toString());
        int xOrigin = largeShape.getStartingPoint()[0] - largeShape.getLeft();
        int yOrigin = largeShape.getStartingPoint()[1] - largeShape.getTop();
        Shape dummyShape = graphicsService.getDot(sourceImage, xOrigin, yOrigin);
        // We want to fill up a mirror of the contiguous pixels within this shape,
        // which is what we'll use for further analysis to know
        // if it's a frame or not.
        WritableImageGrid mirror = graphicsService.getEmptyMirror(largeShape);
        this.findContiguousPixels(largeShape, mirror, dummyShape, xOrigin, yOrigin,
                sourceImage.getSeparationThreshold());

        int adjustedLeft = (int) Math.round((double) mirror.getWidth() * 0.05);
        int adjustedRight = (int) Math.round((double) mirror.getWidth() * 0.95);
        int adjustedTop = (int) Math.round((double) mirror.getHeight() * 0.05);
        int adjustedBottom = (int) Math.round((double) mirror.getHeight() * 0.95);

        int cardinality = 0;
        int innerCardinality = 0;
        for (int x = 0; x < mirror.getWidth(); x++) {
            for (int y = 0; y < mirror.getHeight(); y++) {
                if (mirror.getPixel(x, y) > 0) {
                    cardinality++;
                    if (x >= adjustedLeft && x <= adjustedRight && y >= adjustedTop && y <= adjustedBottom)
                        innerCardinality++;
                }
            }
        }

        LOG.debug("cardinality: " + cardinality);
        LOG.debug("innerCardinality: " + innerCardinality);
        double ratio = (double) innerCardinality / (double) cardinality;
        LOG.debug("ratio: " + ratio);
        if (ratio <= maxFrameCardinalityRatio) {
            LOG.debug("maxFrameCardinalityRatio: " + maxFrameCardinalityRatio);
            LOG.debug("Frame by cardinality! Removing from illustrations");
            illustrations.remove(largeShape);
        } else {
            // Now, it could still be a grid
            // to find this out we need to detect white areas inside the shape.
            WhiteAreaFinder whiteAreaFinder = new WhiteAreaFinder();
            double minWhiteAreaWidth = widthMean * 10;
            double minWhiteAreaHeight = heightMean * 4;
            List<Rectangle> whiteAreas = whiteAreaFinder.getWhiteAreas(mirror, 0, 0, 0, mirror.getWidth() - 1,
                    mirror.getHeight() - 1, minWhiteAreaWidth, minWhiteAreaHeight);
            int whiteAreaSize = 0;
            for (Rectangle whiteArea : whiteAreas) {
                whiteAreaSize += (whiteArea.getWidth() * whiteArea.getHeight());
            }

            int totalSize = mirror.getWidth() * mirror.getHeight();
            LOG.debug("whiteAreaSize: " + whiteAreaSize);
            LOG.debug("totalSize: " + totalSize);

            double sizeRatio = (double) whiteAreaSize / (double) totalSize;
            LOG.debug("sizeRatio: " + sizeRatio);

            if (sizeRatio >= minFrameWhiteAreaSizeRatio) {
                LOG.debug("minFrameWhiteAreaSizeRatio: " + minFrameWhiteAreaSizeRatio);
                LOG.debug("Frame by white area size! Removing from illustrations");
                illustrations.remove(largeShape);
            }

        }
    }

    for (Shape largeShape : illustrations) {
        // Add this to large shapes if it's not a "frame"
        // large shapes are used for paragraph detection
        sourceImage.getLargeShapes().add(largeShape);
    }

    // remove shapes that are enclosed inside illustrations
    List<Shape> enclosedShapesToDelete = new ArrayList<Shape>();
    int extension = 5;
    for (Shape shape : shapes) {
        for (Shape shapeToDelete : illustrations) {
            if (shape.getLeft() >= shapeToDelete.getLeft() - extension
                    && shape.getRight() <= shapeToDelete.getRight() + extension
                    && shape.getTop() >= shapeToDelete.getTop() - extension
                    && shape.getBottom() <= shapeToDelete.getBottom() + extension) {
                LOG.debug("Enclosed shape: " + shape);
                LOG.debug(" enclosed by " + shapeToDelete);
                enclosedShapesToDelete.add(shape);
            }
        }
    }

    shapes.removeAll(largeShapes);
    shapes.removeAll(enclosedShapesToDelete);

    // remove shapes that are practically touching horizontal rules (probably segments of the rule that got split)
    extension = 3;
    List<Shape> listToTestAgainst = horizontalRules;
    for (int i = 0; i < 3; i++) {
        List<Shape> horizontalRuleSegments = new ArrayList<Shape>();
        for (Shape horizontalRule : listToTestAgainst) {
            for (Shape shape : shapes) {
                if ((shape.getLeft() <= horizontalRule.getRight() + extension
                        || shape.getRight() >= horizontalRule.getLeft() - extension)
                        && shape.getTop() >= horizontalRule.getTop() - extension
                        && shape.getBottom() <= horizontalRule.getBottom() + extension) {
                    LOG.debug("Horizontal rule segment: " + shape);
                    LOG.debug(" touching " + horizontalRule);
                    horizontalRuleSegments.add(shape);
                    enclosedShapesToDelete.add(shape);
                }
            }
        }
        shapes.removeAll(horizontalRuleSegments);
        listToTestAgainst = horizontalRuleSegments;
        if (listToTestAgainst.size() == 0)
            break;
    }

}