List of usage examples for org.apache.commons.math.stat.descriptive.moment StandardDeviation StandardDeviation
public StandardDeviation()
From source file:de.tudarmstadt.ukp.dkpro.tc.mallet.report.MalletBatchCrossValidationReport.java
@Override public void execute() throws Exception { StorageService store = getContext().getStorageService(); FlexTable<String> table = FlexTable.forClass(String.class); Map<String, List<Double>> key2resultValues = new HashMap<String, List<Double>>(); for (TaskContextMetadata subcontext : getSubtasks()) { String name = BatchTask.class.getSimpleName() + "CrossValidation"; // one CV batch (which internally ran numFolds times) if (subcontext.getLabel().startsWith(name)) { Map<String, String> discriminatorsMap = store .retrieveBinary(subcontext.getId(), Task.DISCRIMINATORS_KEY, new PropertiesAdapter()) .getMap();/*from ww w. ja v a 2 s.c om*/ File eval = store.getStorageFolder(subcontext.getId(), EVAL_FILE_NAME + SUFFIX_CSV); Map<String, String> resultMap = new HashMap<String, String>(); String[][] evalMatrix = null; int i = 0; for (String line : FileUtils.readLines(eval)) { String[] tokenizedLine = StrTokenizer.getCSVInstance(line).getTokenArray(); if (evalMatrix == null) { evalMatrix = new String[FileUtils.readLines(eval).size()][tokenizedLine.length]; } evalMatrix[i] = tokenizedLine; i++; } // columns for (int j = 0; j < evalMatrix[0].length; j++) { String header = evalMatrix[0][j]; String[] vals = new String[evalMatrix.length - 1]; // rows for (int k = 1; k < evalMatrix.length; k++) { if (evalMatrix[k][j].equals("null")) { vals[k - 1] = String.valueOf(0.); } else { vals[k - 1] = evalMatrix[k][j]; } } Mean mean = new Mean(); Sum sum = new Sum(); StandardDeviation std = new StandardDeviation(); double[] dVals = new double[vals.length]; Set<String> sVals = new HashSet<String>(); for (int k = 0; k < vals.length; k++) { try { dVals[k] = Double.parseDouble(vals[k]); sVals = null; } catch (NumberFormatException e) { dVals = null; sVals.add(vals[k]); } } if (dVals != null) { if (nonAveragedResultsMeasures.contains(header)) { resultMap.put(header, String.valueOf(sum.evaluate(dVals))); } else { resultMap.put(header, String.valueOf(mean.evaluate(dVals)) + "\u00B1" + String.valueOf(std.evaluate(dVals))); } } else { if (sVals.size() > 1) { resultMap.put(header, "---"); } else { resultMap.put(header, vals[0]); } } } String key = getKey(discriminatorsMap); List<Double> results; if (key2resultValues.get(key) == null) { results = new ArrayList<Double>(); } else { results = key2resultValues.get(key); } key2resultValues.put(key, results); Map<String, String> values = new HashMap<String, String>(); Map<String, String> cleanedDiscriminatorsMap = new HashMap<String, String>(); for (String disc : discriminatorsMap.keySet()) { if (!ReportUtils.containsExcludePattern(disc, discriminatorsToExclude)) { cleanedDiscriminatorsMap.put(disc, discriminatorsMap.get(disc)); } } values.putAll(cleanedDiscriminatorsMap); values.putAll(resultMap); table.addRow(subcontext.getLabel(), values); } } getContext().getLoggingService().message(getContextLabel(), ReportUtils.getPerformanceOverview(table)); // Excel cannot cope with more than 255 columns if (table.getColumnIds().length <= 255) { getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_EXCEL, table.getExcelWriter()); } getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_CSV, table.getCsvWriter()); table.setCompact(false); // Excel cannot cope with more than 255 columns if (table.getColumnIds().length <= 255) { getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_EXCEL, table.getExcelWriter()); } getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_CSV, table.getCsvWriter()); // output the location of the batch evaluation folder // otherwise it might be hard for novice users to locate this File dummyFolder = store.getStorageFolder(getContext().getId(), "dummy"); // TODO can we also do this without creating and deleting the dummy folder? getContext().getLoggingService().message(getContextLabel(), "Storing detailed results in:\n" + dummyFolder.getParent() + "\n"); dummyFolder.delete(); }
From source file:de.tudarmstadt.ukp.dkpro.tc.crfsuite.CRFSuiteBatchCrossValidationReport.java
@Override public void execute() throws Exception { StorageService store = getContext().getStorageService(); FlexTable<String> table = FlexTable.forClass(String.class); Map<String, List<Double>> key2resultValues = new HashMap<String, List<Double>>(); for (TaskContextMetadata subcontext : getSubtasks()) { String name = ExperimentCrossValidation.class.getSimpleName(); // one CV batch (which internally ran numFolds times) if (subcontext.getLabel().startsWith(name)) { Map<String, String> discriminatorsMap = store .retrieveBinary(subcontext.getId(), Task.DISCRIMINATORS_KEY, new PropertiesAdapter()) .getMap();/*from w ww .j a va 2 s . c o m*/ File eval = store.getStorageFolder(subcontext.getId(), EVAL_FILE_NAME + SUFFIX_CSV); Map<String, String> resultMap = new HashMap<String, String>(); String[][] evalMatrix = null; int i = 0; for (String line : FileUtils.readLines(eval)) { String[] tokenizedLine = StrTokenizer.getCSVInstance(line).getTokenArray(); if (evalMatrix == null) { evalMatrix = new String[FileUtils.readLines(eval).size()][tokenizedLine.length]; } evalMatrix[i] = tokenizedLine; i++; } // columns for (int j = 0; j < evalMatrix[0].length; j++) { String header = evalMatrix[0][j]; String[] vals = new String[evalMatrix.length - 1]; // rows for (int k = 1; k < evalMatrix.length; k++) { if (evalMatrix[k][j].equals("null")) { vals[k - 1] = String.valueOf(0.); } else { vals[k - 1] = evalMatrix[k][j]; } } Mean mean = new Mean(); Sum sum = new Sum(); StandardDeviation std = new StandardDeviation(); double[] dVals = new double[vals.length]; Set<String> sVals = new HashSet<String>(); for (int k = 0; k < vals.length; k++) { try { dVals[k] = Double.parseDouble(vals[k]); sVals = null; } catch (NumberFormatException e) { dVals = null; sVals.add(vals[k]); } } if (dVals != null) { if (nonAveragedResultsMeasures.contains(header)) { resultMap.put(header + foldSum, String.valueOf(sum.evaluate(dVals))); } else { resultMap.put(header + foldAveraged, String.valueOf( mean.evaluate(dVals) + "\u00B1" + String.valueOf(std.evaluate(dVals)))); } } else { if (sVals.size() > 1) { resultMap.put(header, "---"); } else { resultMap.put(header, vals[0]); } } } String key = getKey(discriminatorsMap); List<Double> results; if (key2resultValues.get(key) == null) { results = new ArrayList<Double>(); } else { results = key2resultValues.get(key); } key2resultValues.put(key, results); Map<String, String> values = new HashMap<String, String>(); Map<String, String> cleanedDiscriminatorsMap = new HashMap<String, String>(); for (String disc : discriminatorsMap.keySet()) { if (!ReportUtils.containsExcludePattern(disc, discriminatorsToExclude)) { cleanedDiscriminatorsMap.put(disc, discriminatorsMap.get(disc)); } } values.putAll(cleanedDiscriminatorsMap); values.putAll(resultMap); table.addRow(subcontext.getLabel(), values); } } getContext().getLoggingService().message(getContextLabel(), ReportUtils.getPerformanceOverview(table)); // Excel cannot cope with more than 255 columns if (table.getColumnIds().length <= 255) { getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_EXCEL, table.getExcelWriter()); } getContext().storeBinary(EVAL_FILE_NAME + "_compact" + SUFFIX_CSV, table.getCsvWriter()); table.setCompact(false); // Excel cannot cope with more than 255 columns if (table.getColumnIds().length <= 255) { getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_EXCEL, table.getExcelWriter()); } getContext().storeBinary(EVAL_FILE_NAME + SUFFIX_CSV, table.getCsvWriter()); // output the location of the batch evaluation folder // otherwise it might be hard for novice users to locate this File dummyFolder = store.getStorageFolder(getContext().getId(), "dummy"); // TODO can we also do this without creating and deleting the dummy folder? getContext().getLoggingService().message(getContextLabel(), "Storing detailed results in:\n" + dummyFolder.getParent() + "\n"); dummyFolder.delete(); }
From source file:cerrla.Performance.java
/** * Records performance scores using sliding windows of results. * /* ww w .j a v a 2 s . c o m*/ * @param currentEpisode * The current episode. */ public void recordPerformanceScore(int currentEpisode) { if (recentScores_.isEmpty()) return; // Transform the queues into arrays double[] vals = new double[recentScores_.size()]; int i = 0; for (Double val : recentScores_) vals[i++] = val.doubleValue(); double[] envSDs = new double[internalSDs_.size()]; i = 0; for (Double envSD : internalSDs_) envSDs[i++] = envSD.doubleValue(); Mean m = new Mean(); StandardDeviation sd = new StandardDeviation(); double mean = m.evaluate(vals); double meanDeviation = sd.evaluate(envSDs) * CONVERGENCE_PERCENT_BUFFER; Double[] details = new Double[PerformanceDetails.values().length]; details[PerformanceDetails.EPISODE.ordinal()] = Double.valueOf(currentEpisode); details[PerformanceDetails.MEAN.ordinal()] = mean; details[PerformanceDetails.SD.ordinal()] = sd.evaluate(vals); performanceDetails_.put(currentEpisode, details); // Output current means if (ProgramArgument.SYSTEM_OUTPUT.booleanValue() && !frozen_) { DecimalFormat formatter = new DecimalFormat("#0.00"); String meanString = formatter.format(mean); String sdString = formatter.format(meanDeviation); System.out.println("Average performance: " + meanString + " " + SD_SYMBOL + " " + sdString); } if (frozen_) { System.out.println(currentEpisode + ": " + details[PerformanceDetails.MEAN.ordinal()]); } }
From source file:jsprit.core.algorithm.acceptor.ExperimentalSchrimpfAcceptance.java
@Override public void informAlgorithmStarts(VehicleRoutingProblem problem, VehicleRoutingAlgorithm algorithm, Collection<VehicleRoutingProblemSolution> solutions) { reset();/*from ww w . j a va 2s . co m*/ logger.info("---------------------------------------------------------------------"); logger.info("prepare schrimpfAcceptanceFunction, i.e. determine initial threshold"); logger.info("start random-walk (see randomWalk.xml)"); double now = System.currentTimeMillis(); this.nOfTotalIterations = algorithm.getMaxIterations(); /* * randomWalk to determine standardDev */ final double[] results = new double[nOfRandomWalks]; URL resource = Resource.getAsURL("randomWalk.xml"); AlgorithmConfig algorithmConfig = new AlgorithmConfig(); new AlgorithmConfigXmlReader(algorithmConfig).read(resource); VehicleRoutingAlgorithm vra = VehicleRoutingAlgorithms.createAlgorithm(problem, algorithmConfig); vra.setMaxIterations(nOfRandomWalks); vra.getAlgorithmListeners().addListener(new IterationEndsListener() { @Override public void informIterationEnds(int iteration, VehicleRoutingProblem problem, Collection<VehicleRoutingProblemSolution> solutions) { double result = Solutions.bestOf(solutions).getCost(); // logger.info("result={}", result); results[iteration - 1] = result; } }); vra.searchSolutions(); StandardDeviation dev = new StandardDeviation(); double standardDeviation = dev.evaluate(results); initialThreshold = standardDeviation / 2; logger.info("warmup done"); logger.info("total time: {}s", ((System.currentTimeMillis() - now) / 1000.0)); logger.info("initial threshold: {}", initialThreshold); logger.info("---------------------------------------------------------------------"); }
From source file:net.sf.katta.tool.loadtest.LoadTestMasterOperation.java
@Override public void nodeOperationsComplete(MasterContext context, List<OperationResult> nodeResults) throws Exception { try {/*from ww w . j a va 2 s . c o m*/ final int queryRate = calculateCurrentQueryRate(); LOG.info("collecting results for iteration " + _currentIteration + " and query rate " + queryRate + " after " + (System.currentTimeMillis() - _currentIterationStartTime) + " ms ..."); List<LoadTestQueryResult> queryResults = new ArrayList<LoadTestQueryResult>(); for (OperationResult operationResult : nodeResults) { if (operationResult == null || operationResult.getUnhandledException() != null) { Exception rootException = null; if (operationResult != null) { rootException = operationResult.getUnhandledException(); } throw new IllegalStateException( "at least one node operation did not completed properly: " + nodeResults, rootException); } LoadTestNodeOperationResult nodeOperationResult = (LoadTestNodeOperationResult) operationResult; queryResults.addAll(nodeOperationResult.getQueryResults()); } LOG.info("Received " + queryResults.size() + " queries, expected " + queryRate * _runTime / 1000); File statisticsFile = new File(_resultDir, "load-test-log-" + _startTime + ".log"); File resultsFile = new File(_resultDir, "load-test-results-" + _startTime + ".log"); Writer statisticsWriter = new OutputStreamWriter(new FileOutputStream(statisticsFile, true)); Writer resultWriter = new OutputStreamWriter(new FileOutputStream(resultsFile, true)); if (_currentIteration == 0) { // print headers statisticsWriter.append("#queryRate \tnode \tstartTime \tendTime \telapseTime \tquery \n"); resultWriter.append( "#requestedQueryRate \tachievedQueryRate \tfiredQueries \tqueryErrors \tavarageQueryDuration \tstandardDeviation \n"); } try { StorelessUnivariateStatistic timeStandardDeviation = new StandardDeviation(); StorelessUnivariateStatistic timeMean = new Mean(); int errors = 0; for (LoadTestQueryResult result : queryResults) { long elapsedTime = result.getEndTime() > 0 ? result.getEndTime() - result.getStartTime() : -1; statisticsWriter.write(queryRate + "\t" + result.getNodeId() + "\t" + result.getStartTime() + "\t" + result.getEndTime() + "\t" + elapsedTime + "\t" + result.getQuery() + "\n"); if (elapsedTime != -1) { timeStandardDeviation.increment(elapsedTime); timeMean.increment(elapsedTime); } else { ++errors; } } resultWriter.write(queryRate + "\t" + ((double) queryResults.size() / (_runTime / 1000)) + "\t" + queryResults.size() + "\t" + errors + "\t" + (int) timeMean.getResult() + "\t" + (int) timeStandardDeviation.getResult() + "\n"); } catch (IOException e) { throw new IllegalStateException("Failed to write statistics data.", e); } try { LOG.info("results written to " + resultsFile.getAbsolutePath()); LOG.info("statistics written to " + statisticsFile.getAbsolutePath()); statisticsWriter.close(); resultWriter.close(); } catch (IOException e) { LOG.warn("Failed to close statistics file."); } if (queryRate + _step <= _endRate) { _currentIteration++; LOG.info("triggering next iteration " + _currentIteration); context.getMasterQueue().add(this); } else { LOG.info("finish load test in iteration " + _currentIteration + " after " + (System.currentTimeMillis() - _startTime) + " ms"); context.getProtocol().removeFlag(getName()); } } catch (Exception e) { context.getProtocol().removeFlag(getName()); } }
From source file:jCloisterZone.CarcassonneEnvironment.java
public static void main(String[] args) { int repetitions = 100; double[] scores = new double[repetitions]; RRLJCloisterClient client = new LocalCarcassonneClient("config.ini"); ServerIF server = null;// w w w . j a v a 2 s . c o m Game game = client.getGame(); Player firstPlayer = null; ArrayList<PlayerSlot> slots = new ArrayList<PlayerSlot>(); for (int r = 0; r < repetitions; r++) { client.createGame(); if (game == null) { server = new LocalCarcassonneServer(client.getGame()); PlayerSlot slot = new PlayerSlot(0, PlayerSlot.SlotType.AI, "RANDOM" + 0, client.getClientId()); slot.setAiClassName(RandomAIPlayer.class.getName()); slots.add(slot); for (int j = 1; j < Integer.parseInt(args[0]); j++) { slot = new PlayerSlot(j, PlayerSlot.SlotType.AI, "AI" + j, client.getClientId()); slot.setAiClassName(LegacyAiPlayer.class.getName()); slots.add(slot); } game = client.getGame(); } else { // Reset the UIs server.stopGame(); game.clearUserInterface(); // Clear the slots and re-add them. for (int i = 0; i < PlayerSlot.COUNT; i++) { server.updateSlot(new PlayerSlot(i), null); } } Collections.shuffle(slots); for (int i = 0; i < slots.size(); i++) { PlayerSlot slot = slots.get(i); PlayerSlot cloneSlot = new PlayerSlot(i, slot.getType(), slot.getNick(), slot.getOwner()); cloneSlot.setAiClassName(slot.getAiClassName()); server.updateSlot(cloneSlot, LegacyAiPlayer.supportedExpansions()); } server.startGame(); Phase phase = game.getPhase(); // Cycle through (probably only once) to keep the game moving. while (phase != null && !phase.isEntered()) { // Modifying phases to proxyless versions if (phase.getClass().equals(CreateGamePhase.class)) phase = game.getPhases().get(ProxylessCreateGamePhase.class); if (phase.getClass().equals(DrawPhase.class)) phase = game.getPhases().get(ProxylessDrawPhase.class); phase.setEntered(true); phase.enter(); phase = game.getPhase(); if (game.getTurnPlayer().getNick().equals("RANDOM0")) firstPlayer = game.getTurnPlayer(); } int score = firstPlayer.getPoints(); scores[r] = score; System.out.println(score); } Mean m = new Mean(); StandardDeviation sd = new StandardDeviation(); System.out.println("Mean: " + m.evaluate(scores) + ", SD: " + sd.evaluate(scores)); }
From source file:com.joliciel.jochre.graphics.SourceImageImpl.java
@Override public Set<Set<RowOfShapes>> getRowClusters() { if (rowClusters == null) { Mean heightMean = new Mean(); StandardDeviation heightStdDev = new StandardDeviation(); List<double[]> rowHeights = new ArrayList<double[]>(this.getRows().size()); for (RowOfShapes row : this.getRows()) { Shape shape = row.getShapes().iterator().next(); int height = shape.getBaseLine() - shape.getMeanLine(); rowHeights.add(new double[] { height }); heightMean.increment(height); heightStdDev.increment(height); }//from w w w. ja v a 2s. co m double stdDevHeight = heightStdDev.getResult(); List<RowOfShapes> rows = new ArrayList<RowOfShapes>(this.getRows()); DBSCANClusterer<RowOfShapes> clusterer = new DBSCANClusterer<RowOfShapes>(rows, rowHeights); rowClusters = clusterer.cluster(stdDevHeight, 2, true); LOG.debug("Found " + rowClusters.size() + " row clusters."); } return rowClusters; }
From source file:com.joliciel.jochre.graphics.SegmenterImpl.java
/** * Split rows if they're particularly high, and contain considerable white space in the middle. * Shapes causing the join will be removed if too high, or attached to the closest row otherwise. * @param sourceImage//w ww .jav a 2 s .c om * @param regressions * @return */ void splitRows(SourceImage sourceImage) { LOG.debug("########## splitRows #########"); // Calculate the min row height to be considered for splitting double minHeightForSplit = sourceImage.getAverageShapeHeight(); LOG.debug("minHeightForSplit: " + minHeightForSplit); double slopeMean = sourceImage.getMeanHorizontalSlope(); List<RowOfShapes> candidateRows = new ArrayList<RowOfShapes>(); for (RowOfShapes row : sourceImage.getRows()) { if (row.getRight() == row.getLeft()) continue; int height = row.getBottom() - row.getTop(); if (height >= minHeightForSplit) { LOG.debug("Adding candidate " + row.toString()); candidateRows.add(row); } } // For each row to be considered for splitting, see if there are lines of white space inside it. Hashtable<RowOfShapes, List<RowOfShapes>> splitRows = new Hashtable<RowOfShapes, List<RowOfShapes>>(); for (RowOfShapes row : candidateRows) { SimpleRegression regression = new SimpleRegression(); // y = intercept + slope * x LOG.debug("Left point: (" + row.getLeft() + " , " + row.getTop() + ")"); regression.addData(row.getLeft(), row.getTop()); double rightHandY = row.getTop() + ((double) (row.getRight() - row.getLeft()) * slopeMean); LOG.debug("Right point: (" + row.getRight() + " , " + rightHandY + ")"); regression.addData(row.getRight(), rightHandY); int yDelta = (int) Math.ceil(Math.abs(rightHandY - (double) row.getTop())); int yInterval = yDelta + (row.getBottom() - row.getTop() + 1) + yDelta; LOG.debug("yDelta: " + yDelta); LOG.debug("yInterval: " + yInterval); // let's get pixel counts shape by shape, and leave out the rest (in case rows overlap vertically) int[] pixelCounts = new int[yInterval]; for (Shape shape : row.getShapes()) { LOG.trace("Shape " + shape); int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft())); LOG.trace("yDeltaAtLeft: " + yDeltaAtLeft); // the shape offset + the offset between the regression line and the row top // + the delta we left at the start in case the line slopes upwards to the right int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta; LOG.trace("topIndex: (" + shape.getTop() + " - " + row.getTop() + ") + (" + row.getTop() + " - " + yDeltaAtLeft + ") + " + yDelta + " = " + topIndex); for (int x = 0; x < shape.getWidth(); x++) { for (int y = 0; y < shape.getHeight(); y++) { if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold())) { pixelCounts[topIndex + y]++; } } } } Mean pixelCountMean = new Mean(); StandardDeviation pixelCountStdDev = new StandardDeviation(); for (int i = 0; i < yInterval; i++) { LOG.debug("Pixel count " + i + ": " + pixelCounts[i]); pixelCountMean.increment(pixelCounts[i]); pixelCountStdDev.increment(pixelCounts[i]); } LOG.debug("pixel count mean: " + pixelCountMean.getResult() + ", std dev: " + pixelCountStdDev.getResult()); // If there's a split required, we're going to go considerably above and below the mean several times double lowThreshold = pixelCountMean.getResult() / 2.0; double highThreshold = pixelCountMean.getResult() * 2.0; boolean inRow = false; List<Integer> switches = new ArrayList<Integer>(); for (int i = 0; i < yInterval; i++) { if (!inRow && pixelCounts[i] > highThreshold) { LOG.debug("In row at " + i + ", pixel count " + pixelCounts[i]); inRow = true; switches.add(i); } else if (inRow && pixelCounts[i] < lowThreshold) { LOG.debug("Out of row at " + i + ", pixel count " + pixelCounts[i]); inRow = false; switches.add(i); } } if (switches.size() > 2) { // we have more than one row List<Integer> rowSeparations = new ArrayList<Integer>(); // find the row separators for (int switchIndex = 1; switchIndex < switches.size() - 2; switchIndex = switchIndex + 2) { int outOfRow = switches.get(switchIndex); int intoRow = switches.get(switchIndex + 1); int minPixelCount = (int) Math.ceil(highThreshold); int minIndex = -1; // find the row with the lowest pixel count for (int i = outOfRow; i <= intoRow; i++) { if (pixelCounts[i] < minPixelCount) { minPixelCount = pixelCounts[i]; minIndex = i; } } rowSeparations.add(minIndex); } // separate the shapes among the rows List<RowOfShapes> newRows = new ArrayList<RowOfShapes>(rowSeparations.size() + 1); for (int i = 0; i <= rowSeparations.size(); i++) { newRows.add(graphicsService.getEmptyRow(sourceImage)); } // add a separator at the beginning and end rowSeparations.add(0, 0); rowSeparations.add(yInterval + 1); for (Shape shape : row.getShapes()) { int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft())); int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta; int firstSepAfterShapeBottom = rowSeparations.size(); int lastSepBeforeShapeTop = -1; for (int i = rowSeparations.size() - 1; i >= 0; i--) { int rowSeparation = rowSeparations.get(i); if (rowSeparation <= topIndex) { lastSepBeforeShapeTop = i; break; } } for (int i = 0; i < rowSeparations.size(); i++) { int rowSeparation = rowSeparations.get(i); if (rowSeparation >= topIndex + shape.getHeight()) { firstSepAfterShapeBottom = i; break; } } if (lastSepBeforeShapeTop == firstSepAfterShapeBottom - 1) { // shape clearly belongs to one row RowOfShapes newRow = newRows.get(lastSepBeforeShapeTop); newRow.addShape(shape); } else { // is the shape much closer to one row than another? // if yes, add it to then add it to this row int[] yPixelsPerRow = new int[newRows.size()]; for (int i = 0; i < newRows.size(); i++) { int separatorTop = rowSeparations.get(i); int separatorBottom = rowSeparations.get(i + 1); int top = topIndex < separatorTop ? separatorTop : topIndex; int bottom = topIndex + shape.getHeight() < separatorBottom ? topIndex + shape.getHeight() : separatorBottom; yPixelsPerRow[i] = bottom - top; } int pixelsInMaxRow = 0; int maxPixelRowIndex = -1; for (int i = 0; i < newRows.size(); i++) { if (yPixelsPerRow[i] > pixelsInMaxRow) { pixelsInMaxRow = yPixelsPerRow[i]; maxPixelRowIndex = i; } } double minPercentage = 0.8; if (((double) pixelsInMaxRow / (double) shape.getHeight()) >= minPercentage) { RowOfShapes newRow = newRows.get(maxPixelRowIndex); newRow.addShape(shape); } else { // otherwise, the shape needs to be got rid of // as it's causing massive confusion // do this by simply not adding it anywhere } } // is the shape in one row exactly? } // next shape splitRows.put(row, newRows); } // do we have more than one row? } // next row for (RowOfShapes row : splitRows.keySet()) { List<RowOfShapes> newRows = splitRows.get(row); sourceImage.replaceRow(row, newRows); } }
From source file:ch.ethz.bsse.quasirecomb.model.Preprocessing.java
private static void computeInsertDist(Read[] reads) { List<Integer> l = new LinkedList<>(); StringBuilder insertSB = new StringBuilder(); int x = 0;//from www . ja v a 2 s . c o m for (Read r : reads) { if (r.isPaired()) { l.add(r.getCrickBegin() - r.getWatsonEnd()); // inserts[x++] = ; Globals.getINSTANCE().incPAIRED(); } else if (r.isMerged()) { Globals.getINSTANCE().incMERGED(); } for (int i = 0; i < r.getCount(); i++) { insertSB.append(r.getInsertion()).append("\n"); } } double[] inserts = new double[Globals.getINSTANCE().getPAIRED_COUNT()]; for (Integer i : l) { inserts[x++] = i; } StatusUpdate.getINSTANCE().println("Insert size\t" + Math.round((new Mean().evaluate(inserts)) * 10) / 10 + " (" + Math.round(new StandardDeviation().evaluate(inserts) * 10) / 10 + ")"); Utils.saveFile(Globals.getINSTANCE().getSAVEPATH() + "support" + File.separator + "insertSize.txt", insertSB.toString()); }
From source file:com.joliciel.jochre.graphics.SegmenterImpl.java
void removeOversizedShapes(List<Shape> shapes) { LOG.debug("########## removeOversizedShapes #########"); Mean shapeHeightMean = new Mean(); Mean shapeWidthMean = new Mean(); for (Shape shape : shapes) { shapeHeightMean.increment(shape.getHeight()); shapeWidthMean.increment(shape.getWidth()); }//from ww w . ja v a 2s. c o m double heightMean = shapeHeightMean.getResult(); double widthMean = shapeWidthMean.getResult(); LOG.debug("heightMean: " + heightMean); LOG.debug("widthMean: " + widthMean); shapeHeightMean = new Mean(); shapeWidthMean = new Mean(); StandardDeviation shapeHeightStdDev = new StandardDeviation(); for (Shape shape : shapes) { if (shape.getHeight() > heightMean && shape.getHeight() < (heightMean * 2.0) && shape.getWidth() > widthMean && shape.getWidth() < (widthMean * 2.0)) { shapeHeightMean.increment(shape.getHeight()); shapeHeightStdDev.increment(shape.getHeight()); shapeWidthMean.increment(shape.getWidth()); } } heightMean = shapeHeightMean.getResult(); widthMean = shapeWidthMean.getResult(); LOG.debug("average shape heightMean: " + heightMean); LOG.debug("average shape widthMean: " + widthMean); double minHeightBigShape = heightMean * 6; double minWidthWideShape = widthMean * 6; double minHeightWideShape = heightMean * 1.5; double minHeightTallShape = heightMean * 2.5; double maxWidthTallShape = widthMean / 2; LOG.debug("minHeightBigShape: " + minHeightBigShape); LOG.debug("minWidthWideShape: " + minWidthWideShape); LOG.debug("minHeightWideShape: " + minHeightWideShape); LOG.debug("minHeightTallShape: " + minHeightTallShape); LOG.debug("maxWidthTallShape: " + maxWidthTallShape); List<Shape> largeShapes = new ArrayList<Shape>(); List<Shape> horizontalRules = new ArrayList<Shape>(); for (Shape shape : shapes) { if (shape.getHeight() > minHeightBigShape) { LOG.debug("Removing " + shape + " (height)"); largeShapes.add(shape); } else if (shape.getWidth() > minWidthWideShape && shape.getHeight() > minHeightWideShape) { // we don't want to remove horizontal bars, but we do want to remove other shapes. // why not? I suppose horizontal bars are easily represented as characters? LOG.debug("Removing " + shape + " (width)"); largeShapes.add(shape); } else if (shape.getWidth() > minWidthWideShape) { // ok, we will remove horizontal rules after all LOG.debug("Removing " + shape + " (horizontal rule)"); largeShapes.add(shape); horizontalRules.add(shape); } else if (shape.getWidth() <= maxWidthTallShape && shape.getHeight() > minHeightTallShape) { LOG.debug("Removing " + shape + " (narrow)"); largeShapes.add(shape); } } // Only want to remove enclosed shapes if the large shape isn't a frame/grid // A) first reduce the shape by 5 percent and see it's cardinality reduces vastly (in which case it's a frame) // if so, don't remove enclosed shapes // B) next, detect white rectangles within the shape - if they're big enough, don't remove enclosed shapes LOG.debug("Are large shapes frames or illustrations?"); double maxFrameCardinalityRatio = 0.5; double minFrameWhiteAreaSizeRatio = 0.9; List<Shape> illustrations = new ArrayList<Shape>(largeShapes); for (Shape largeShape : largeShapes) { LOG.debug(largeShape.toString()); int xOrigin = largeShape.getStartingPoint()[0] - largeShape.getLeft(); int yOrigin = largeShape.getStartingPoint()[1] - largeShape.getTop(); Shape dummyShape = graphicsService.getDot(sourceImage, xOrigin, yOrigin); // We want to fill up a mirror of the contiguous pixels within this shape, // which is what we'll use for further analysis to know // if it's a frame or not. WritableImageGrid mirror = graphicsService.getEmptyMirror(largeShape); this.findContiguousPixels(largeShape, mirror, dummyShape, xOrigin, yOrigin, sourceImage.getSeparationThreshold()); int adjustedLeft = (int) Math.round((double) mirror.getWidth() * 0.05); int adjustedRight = (int) Math.round((double) mirror.getWidth() * 0.95); int adjustedTop = (int) Math.round((double) mirror.getHeight() * 0.05); int adjustedBottom = (int) Math.round((double) mirror.getHeight() * 0.95); int cardinality = 0; int innerCardinality = 0; for (int x = 0; x < mirror.getWidth(); x++) { for (int y = 0; y < mirror.getHeight(); y++) { if (mirror.getPixel(x, y) > 0) { cardinality++; if (x >= adjustedLeft && x <= adjustedRight && y >= adjustedTop && y <= adjustedBottom) innerCardinality++; } } } LOG.debug("cardinality: " + cardinality); LOG.debug("innerCardinality: " + innerCardinality); double ratio = (double) innerCardinality / (double) cardinality; LOG.debug("ratio: " + ratio); if (ratio <= maxFrameCardinalityRatio) { LOG.debug("maxFrameCardinalityRatio: " + maxFrameCardinalityRatio); LOG.debug("Frame by cardinality! Removing from illustrations"); illustrations.remove(largeShape); } else { // Now, it could still be a grid // to find this out we need to detect white areas inside the shape. WhiteAreaFinder whiteAreaFinder = new WhiteAreaFinder(); double minWhiteAreaWidth = widthMean * 10; double minWhiteAreaHeight = heightMean * 4; List<Rectangle> whiteAreas = whiteAreaFinder.getWhiteAreas(mirror, 0, 0, 0, mirror.getWidth() - 1, mirror.getHeight() - 1, minWhiteAreaWidth, minWhiteAreaHeight); int whiteAreaSize = 0; for (Rectangle whiteArea : whiteAreas) { whiteAreaSize += (whiteArea.getWidth() * whiteArea.getHeight()); } int totalSize = mirror.getWidth() * mirror.getHeight(); LOG.debug("whiteAreaSize: " + whiteAreaSize); LOG.debug("totalSize: " + totalSize); double sizeRatio = (double) whiteAreaSize / (double) totalSize; LOG.debug("sizeRatio: " + sizeRatio); if (sizeRatio >= minFrameWhiteAreaSizeRatio) { LOG.debug("minFrameWhiteAreaSizeRatio: " + minFrameWhiteAreaSizeRatio); LOG.debug("Frame by white area size! Removing from illustrations"); illustrations.remove(largeShape); } } } for (Shape largeShape : illustrations) { // Add this to large shapes if it's not a "frame" // large shapes are used for paragraph detection sourceImage.getLargeShapes().add(largeShape); } // remove shapes that are enclosed inside illustrations List<Shape> enclosedShapesToDelete = new ArrayList<Shape>(); int extension = 5; for (Shape shape : shapes) { for (Shape shapeToDelete : illustrations) { if (shape.getLeft() >= shapeToDelete.getLeft() - extension && shape.getRight() <= shapeToDelete.getRight() + extension && shape.getTop() >= shapeToDelete.getTop() - extension && shape.getBottom() <= shapeToDelete.getBottom() + extension) { LOG.debug("Enclosed shape: " + shape); LOG.debug(" enclosed by " + shapeToDelete); enclosedShapesToDelete.add(shape); } } } shapes.removeAll(largeShapes); shapes.removeAll(enclosedShapesToDelete); // remove shapes that are practically touching horizontal rules (probably segments of the rule that got split) extension = 3; List<Shape> listToTestAgainst = horizontalRules; for (int i = 0; i < 3; i++) { List<Shape> horizontalRuleSegments = new ArrayList<Shape>(); for (Shape horizontalRule : listToTestAgainst) { for (Shape shape : shapes) { if ((shape.getLeft() <= horizontalRule.getRight() + extension || shape.getRight() >= horizontalRule.getLeft() - extension) && shape.getTop() >= horizontalRule.getTop() - extension && shape.getBottom() <= horizontalRule.getBottom() + extension) { LOG.debug("Horizontal rule segment: " + shape); LOG.debug(" touching " + horizontalRule); horizontalRuleSegments.add(shape); enclosedShapesToDelete.add(shape); } } } shapes.removeAll(horizontalRuleSegments); listToTestAgainst = horizontalRuleSegments; if (listToTestAgainst.size() == 0) break; } }