List of usage examples for org.apache.commons.math.stat.descriptive.moment Mean Mean
public Mean()
From source file:dr.evomodel.epidemiology.casetocase.CaseToCaseTreeLikelihood.java
public static Double[] getSummaryStatistics(Double[] variable) { double[] primitiveVariable = new double[variable.length]; for (int i = 0; i < variable.length; i++) { primitiveVariable[i] = variable[i]; }/*from www. jav a 2 s . c om*/ Double[] out = new Double[4]; out[0] = (new Mean()).evaluate(primitiveVariable); out[1] = (new Median()).evaluate(primitiveVariable); out[2] = (new Variance()).evaluate(primitiveVariable); out[3] = Math.sqrt(out[2]); return out; }
From source file:com.joliciel.jochre.graphics.ShapeImpl.java
double getBrightnessMeanBySector(String key, SectionBrightnessMeasurementMethod measurementMethod) { Map<SectionBrightnessMeasurementMethod, Double> methodToMeanMap = this.brightnessMeanBySectorMap.get(key); if (methodToMeanMap == null) { methodToMeanMap = new HashMap<Shape.SectionBrightnessMeasurementMethod, Double>(); this.brightnessMeanBySectorMap.put(key, methodToMeanMap); }/* w w w. jav a2s . com*/ Double brightnessMeanBySectorObj = methodToMeanMap.get(measurementMethod); double brightnessMeanBySector = 0.0; if (brightnessMeanBySectorObj == null) { Mean mean = new Mean(); Map<SectionBrightnessMeasurementMethod, double[][]> brightnessByMethod = this.brightnessBySectorMap .get(key); double[][] brightnessGrid = brightnessByMethod.get(measurementMethod); for (int i = 0; i < brightnessGrid.length; i++) mean.incrementAll(brightnessGrid[i]); brightnessMeanBySector = mean.getResult(); methodToMeanMap.put(measurementMethod, brightnessMeanBySector); } else { brightnessMeanBySector = brightnessMeanBySectorObj.doubleValue(); } return brightnessMeanBySector; }
From source file:com.joliciel.jochre.graphics.RowOfShapesImpl.java
/** * If there are different font-sizes in the current row, * calculate separate guidelines for the separate font-sizes. * Assumes groups have already been assigned. * @return index of first group after split *///w w w . j a va 2 s .c om @Override public void splitByFontSize() { LOG.debug("splitByFontSize, " + this.toString()); double[] meanAscenderToXHeightRatios = new double[this.getGroups().size()]; int i = 0; double xHeight = this.getXHeight(); double minHeightRatio = 0.7; for (GroupOfShapes group : this.getGroups()) { Mean meanAscenderToXHeightRatio = new Mean(); for (Shape shape : group.getShapes()) { if (((double) shape.getHeight() / xHeight) > minHeightRatio) { double ascenderToXHeightRatio = ((double) shape.getBaseLine() / xHeight); LOG.trace("Shape " + shape.getIndex() + ": " + ascenderToXHeightRatio); meanAscenderToXHeightRatio.increment(ascenderToXHeightRatio); } } if (meanAscenderToXHeightRatio.getN() > 0) { meanAscenderToXHeightRatios[i] = meanAscenderToXHeightRatio.getResult(); LOG.debug(group.toString() + ": " + meanAscenderToXHeightRatios[i]); } i++; } double threshold = 0.15; LOG.debug("threshold: " + threshold); double lastRatio = 0; List<int[]> bigAreas = new ArrayList<int[]>(); int bigAreaStart = 0; int inBigArea = -1; for (i = 0; i < this.getGroups().size(); i++) { if (i > 0) { if (meanAscenderToXHeightRatios[i] != 0) { if ((inBigArea < 0 || inBigArea == 1) && lastRatio - meanAscenderToXHeightRatios[i] >= threshold) { // big drop int[] bigArea = new int[] { bigAreaStart, i - 1 }; bigAreas.add(bigArea); LOG.debug("Adding big area " + bigArea[0] + "," + bigArea[1]); inBigArea = 0; } else if ((inBigArea < 0 || inBigArea == 0) && meanAscenderToXHeightRatios[i] - lastRatio >= threshold) { // big leap bigAreaStart = i; inBigArea = 1; } } } if (meanAscenderToXHeightRatios[i] != 0) lastRatio = meanAscenderToXHeightRatios[i]; } if (inBigArea == 1) { int[] bigArea = new int[] { bigAreaStart, this.getGroups().size() - 1 }; bigAreas.add(bigArea); LOG.debug("Adding big area " + bigArea[0] + "," + bigArea[1]); } // Now, which of these big areas are really big enough if (bigAreas.size() > 0) { double minBrightnessRatioForSplit = 1.5; Mean brightnessMean = new Mean(); Mean[] meanCardinalities = new Mean[bigAreas.size()]; for (i = 0; i < bigAreas.size(); i++) { meanCardinalities[i] = new Mean(); } i = 0; for (GroupOfShapes group : this.getGroups()) { int bigAreaIndex = -1; int j = 0; for (int[] bigArea : bigAreas) { if (i >= bigArea[0] && i <= bigArea[1]) { bigAreaIndex = j; break; } j++; } for (Shape shape : group.getShapes()) { if (((double) shape.getHeight() / xHeight) > minHeightRatio) { if (bigAreaIndex >= 0) { meanCardinalities[bigAreaIndex].increment(shape.getTotalBrightness()); } else { brightnessMean.increment(shape.getTotalBrightness()); } } } i++; } // next group boolean[] bigAreaConfirmed = new boolean[bigAreas.size()]; boolean hasSplit = false; LOG.debug("brightnessMean for small areas: " + brightnessMean.getResult()); for (i = 0; i < bigAreas.size(); i++) { int[] bigArea = bigAreas.get(i); double ratio = meanCardinalities[i].getResult() / brightnessMean.getResult(); LOG.debug("big area " + bigArea[0] + "," + bigArea[1]); LOG.debug("brightness mean: " + meanCardinalities[i].getResult()); LOG.debug("brightness ratio: " + ratio); if (ratio > minBrightnessRatioForSplit) { // split found! LOG.debug("Confirmed!"); bigAreaConfirmed[i] = true; hasSplit = true; } } List<GroupOfShapes> bigGroups = null; List<GroupOfShapes> littleGroups = null; if (hasSplit) { bigGroups = new ArrayList<GroupOfShapes>(); littleGroups = new ArrayList<GroupOfShapes>(); i = 0; boolean lastGroupSingleShapeLittle = false; boolean lastGroupBig = false; GroupOfShapes lastGroup = null; for (GroupOfShapes group : this.getGroups()) { boolean singleShapeLittleGroup = false; int bigAreaIndex = -1; int j = 0; for (int[] bigArea : bigAreas) { if (i >= bigArea[0] && i <= bigArea[1]) { bigAreaIndex = j; break; } j++; } if (bigAreaIndex >= 0 && bigAreaConfirmed[bigAreaIndex]) { if (lastGroupSingleShapeLittle) { // Can't keep single shape little groups on their own LOG.debug("Switching last group to big: " + lastGroup.toString()); littleGroups.remove(littleGroups.size() - 1); bigGroups.add(lastGroup); } LOG.debug("Adding big group " + group.toString()); bigGroups.add(group); lastGroupBig = true; } else { LOG.debug("Adding little group " + group.toString()); littleGroups.add(group); if (group.getShapes().size() == 1 && lastGroupBig) { singleShapeLittleGroup = true; } lastGroupBig = false; } lastGroupSingleShapeLittle = singleShapeLittleGroup; lastGroup = group; i++; } // next group hasSplit = bigGroups.size() > 0 && littleGroups.size() > 0; } if (hasSplit) { int xHeightBig = this.assignGuideLines(bigGroups); int xHeightLittle = this.assignGuideLines(littleGroups); // There may be a better way of determining which xHeight to use for the row // than simply based on number of groups, e.g. group width, etc. if (bigGroups.size() > littleGroups.size()) { LOG.debug("Setting xHeight to " + xHeightBig); this.setXHeight(xHeightBig); } else { LOG.debug("Setting xHeight to " + xHeightLittle); this.setXHeight(xHeightLittle); } LOG.debug("Setting xHeightMax to " + xHeightBig); this.setXHeightMax(xHeightBig); } // has split } // split candidate }
From source file:com.joliciel.jochre.graphics.SegmenterImpl.java
private int getFillFactor(SourceImage sourceImage) { LOG.debug("########## getFillFactor #########"); List<Shape> sample = this.getSample(sourceImage.getRows(), 40, true); Mean mean = new Mean(); ShapeFiller shapeFiller = this.graphicsService.getShapeFiller(); for (Shape shape : sample) { LOG.debug("Shape: " + shape); int fillFactor = shapeFiller.getFillFactor(shape, sourceImage.getBlackThreshold()); LOG.debug("fillFactor: " + fillFactor); mean.increment(fillFactor);//w w w . j a v a 2 s.c om } double meanFillFactor = mean.getResult(); LOG.debug("meanFillFactor: " + meanFillFactor); int imageFillFactor = (int) Math.round(mean.getResult()); LOG.debug("imageFillFactor: " + imageFillFactor); return imageFillFactor; }
From source file:com.joliciel.jochre.graphics.SegmenterImpl.java
void groupShapesIntoWords(Set<RowOfShapes> rowCluster) { LOG.debug("Next row cluster of size " + rowCluster.size()); // group the shapes together into words Mean spaceMean = new Mean(); StandardDeviation spaceStdDev = new StandardDeviation(); int maxSpaceLog = 120; int[] spaceCounts = new int[maxSpaceLog]; List<Integer> spaces = new ArrayList<Integer>(); for (RowOfShapes row : rowCluster) { Shape previousShape = null; for (Shape shape : row.getShapes()) { if (previousShape != null) { int space = 0; if (sourceImage.isLeftToRight()) space = shape.getLeft() - previousShape.getRight(); else space = previousShape.getLeft() - shape.getRight(); LOG.trace(shape);// w w w . j a v a 2 s. co m LOG.trace("Space : " + space); if (space < maxSpaceLog && space >= 0) spaceCounts[space]++; if (space >= 0) { spaces.add(space); spaceMean.increment(space); spaceStdDev.increment(space); } } previousShape = shape; } // next shape } for (int i = 0; i < maxSpaceLog; i++) { //LOG.debug("Space count " + i + ": " + spaceCounts[i]); } double spaceMeanVal = spaceMean.getResult(); double spaceStdDevVal = spaceStdDev.getResult(); LOG.debug("Space mean: " + spaceMeanVal); LOG.debug("Space std dev: " + spaceStdDevVal); // If however there is only a single word on the row, the // standard deviation will be very low. boolean singleWord = false; if (spaceStdDevVal * 2 < spaceMeanVal) { LOG.debug("Assuming a single word per row"); singleWord = true; } // Since there should be two groups, one for letters and one for words, // the mean should be somewhere in between. We now look for the mean on the // lesser group and will use it as the basis for comparison. spaceMean = new Mean(); spaceStdDev = new StandardDeviation(); for (int space : spaces) { if (space < spaceMeanVal && space >= 0) { spaceMean.increment(space); spaceStdDev.increment(space); } } spaceMeanVal = spaceMean.getResult(); spaceStdDevVal = spaceStdDev.getResult(); LOG.debug("Letter space mean: " + spaceMeanVal); LOG.debug("Letter space std dev: " + spaceStdDevVal); int letterSpaceThreshold = 0; if (singleWord) letterSpaceThreshold = Integer.MAX_VALUE; else letterSpaceThreshold = (int) Math.round(spaceMeanVal + (4.0 * spaceStdDevVal)); for (RowOfShapes row : rowCluster) { LOG.debug(row.toString()); //row.getGroups().clear(); row.organiseShapesInGroups(letterSpaceThreshold); } // next row }
From source file:com.joliciel.jochre.graphics.SegmenterImpl.java
void splitShapes(SourceImage sourceImage, int fillFactor) { LOG.debug("########## splitShapes #########"); // Cluster rows into rows of a similar height // Once we have this, we look for any shapes that are wider than average // and attempt to split them by looking for any bridges that are considerable thinner // than the stroke thickness and yet have big pixel counts on either side. // In order to split, we need four parameters // 1) minShapeWidth: the minimum shape width to consider for a split // 2) maxBridgeWidth: the maximum bridge width to use as a dividing bridge between two shapes when splitting // 3) minLetterWeight: the minimum pixel count that can represent a separate letter when splitting // 4) maxHorizontalOverlap: the maximum horizontal overlap between the left-hand and right-hand shape // These parameters are different for different font sizes // Therefore, we first need to group the rows on the image into clusters by height double imageShapeMean = sourceImage.getAverageShapeWidth(); double maxWidthForSplit = imageShapeMean * 6.0; // avoid splitting horizontal rules! Set<Set<RowOfShapes>> rowClusters = sourceImage.getRowClusters(); for (Set<RowOfShapes> rowCluster : rowClusters) { LOG.debug("Analysing row cluster"); // 1) minShapeWidth: calculate the minimum shape width to be considered for splitting // first get the mean Mean meanWidth = new Mean(); List<Shape> shapes = new ArrayList<Shape>(); for (RowOfShapes row : rowCluster) { for (Shape shape : row.getShapes()) { meanWidth.increment(shape.getWidth()); shapes.add(shape);// w ww .ja v a2 s . c om } } double shapeWidthMean = meanWidth.getResult(); LOG.debug("Mean width: " + shapeWidthMean); meanWidth.clear(); // Note: there is much trial and error for these numbers // but the general guideline is that it is easier to deal downstream // with bad joins than with bad splits // so we prefer to err on the upper side double fillFactorScale = 0.15 * fillFactor; double widthForSplittingLower = shapeWidthMean * (1.6 + fillFactorScale); double widthForSplittingUpper = shapeWidthMean * (2.2 + fillFactorScale); LOG.debug("widthForSplittingLower: " + widthForSplittingLower); LOG.debug("widthForSplittingUpper: " + widthForSplittingUpper); LOG.debug("maxWidthForSplit: " + maxWidthForSplit); List<Shape> candidates = new ArrayList<Shape>(); for (RowOfShapes row : rowCluster) { LOG.debug("Next row " + row.getIndex()); for (Shape shape : row.getShapes()) { LOG.trace("Shape width " + shape.getWidth()); if (shape.getWidth() > widthForSplittingLower && shape.getWidth() < maxWidthForSplit) { candidates.add(shape); LOG.debug("Found candidate with width " + shape.getWidth() + ": " + shape); } } } if (candidates.size() > 0) { // we'll take a random sampling of shapes for the next parameters int sampleSize = 30; List<Shape> sample = this.getSample(rowCluster, sampleSize, true); Mean meanPixelCount = new Mean(); Vectorizer vectorizer = this.graphicsService.getVectorizer(); List<Integer> thicknesses = new ArrayList<Integer>(); for (Shape shape : sample) { BitSet bitset = shape.getBlackAndWhiteBitSet(sourceImage.getSeparationThreshold(), 0); meanPixelCount.increment(bitset.cardinality()); List<LineSegment> vectors = vectorizer.vectorize(shape); int height = shape.getHeight(); int sampleStep = (int) Math.ceil(height / 8); for (LineSegment vector : vectors) { List<Integer> vectorThickness = vector.getLineDefinition().findArrayListThickness(shape, vector.getStartX(), vector.getStartY(), vector.getLength(), sourceImage.getSeparationThreshold(), 0, sampleStep); thicknesses.addAll(vectorThickness); } } double pixelCountMean = meanPixelCount.getResult(); Mean meanThickness = new Mean(); for (int thickness : thicknesses) { meanThickness.increment(thickness); } double thicknessMean = meanThickness.getResult(); meanThickness = new Mean(); for (int thickness : thicknesses) { if (thickness < thicknessMean) meanThickness.increment(thickness); } thicknessMean = meanThickness.getResult(); LOG.debug("thicknessMean: " + thicknessMean); // 2) maxBridgeWidth: the maximum bridge width to use as a dividing bridge between two shapes when splitting double maxBridgeWidthLower = thicknessMean * 0.5; double maxBridgeWidthUpper = thicknessMean * 0.8; LOG.debug("maxBridgeWidthLower: " + maxBridgeWidthLower); LOG.debug("maxBridgeWidthUpper: " + maxBridgeWidthUpper); // 3) minLetterWeight: the minimum pixel count that can represent a separate letter when splitting int minLetterWeight = (int) Math.floor(pixelCountMean / 4.0); LOG.debug("minLetterWeight: " + minLetterWeight); // 4) maxHorizontalOverlap: the maximum horizontal overlap between the left-hand and right-hand shape int maxOverlap = (int) Math.ceil(shapeWidthMean / 8.0); LOG.debug("maxOverlap: " + maxOverlap); Map<Shape, List<Shape>> shapesToSplit = new Hashtable<Shape, List<Shape>>(); for (Shape candidate : candidates) { LOG.debug("Trying to split candidate " + candidate); for (int y = 0; y < candidate.getHeight(); y++) { String line = ""; if (y == candidate.getMeanLine()) line += "M"; else if (y == candidate.getBaseLine()) line += "B"; else line += y; for (int x = 0; x < candidate.getWidth(); x++) { if (candidate.isPixelBlack(x, y, sourceImage.getBlackThreshold())) line += "x"; else line += "o"; } LOG.debug(line); } if (candidate.getHeight() < 3.0 * maxBridgeWidthUpper) { LOG.debug("Shape too narrow - probably a long dash."); continue; } int maxBridgeWidth; if (candidate.getWidth() > widthForSplittingUpper) maxBridgeWidth = (int) Math.ceil(maxBridgeWidthUpper); else { // since many bridges are thicker than expected // add a rule that the thicker the bridge is, the wider the image needs to be maxBridgeWidth = (int) Math.ceil( maxBridgeWidthLower + (((double) candidate.getWidth() - widthForSplittingLower) / (widthForSplittingUpper - widthForSplittingLower) * (maxBridgeWidthUpper - maxBridgeWidthLower))); } List<Shape> splitShapes = this.splitShape(candidate, sourceImage, maxBridgeWidth, minLetterWeight, maxOverlap); if (splitShapes.size() > 1) { LOG.debug("Split found"); for (Shape splitShape : splitShapes) { splitShape.setRow(candidate.getRow()); } shapesToSplit.put(candidate, splitShapes); } } LOG.debug("Replacing shapes with split shapes"); List<RowOfShapes> rowsToReorder = new ArrayList<RowOfShapes>(); for (Shape shape : shapesToSplit.keySet()) { List<Shape> newShapes = shapesToSplit.get(shape); RowOfShapes row = shape.getRow(); row.removeShape(shape); row.addShapes(newShapes); rowsToReorder.add(row); } for (RowOfShapes row : rowsToReorder) row.reorderShapes(); } } LOG.debug("splitShapes complete"); }
From source file:com.joliciel.jochre.graphics.SegmenterImpl.java
/** * Get a random sample (with replacement) of shapes on this image. * @param sourceImage/*ww w. j a v a2 s .c o m*/ * @param sampleSize * @return */ List<Shape> getSample(Collection<RowOfShapes> rows, int sampleSize, boolean bigShapesOnly) { double minShapeWidth = 0; double minShapeHeight = 0; double maxShapeWidth = Double.MAX_VALUE; double maxShapeHeight = Double.MAX_VALUE; if (bigShapesOnly) { Mean widthMean = new Mean(); Mean heightMean = new Mean(); for (RowOfShapes row : rows) { for (Shape shape : row.getShapes()) { widthMean.increment(shape.getWidth()); heightMean.increment(shape.getHeight()); } } minShapeWidth = widthMean.getResult(); minShapeHeight = heightMean.getResult(); maxShapeWidth = minShapeWidth * 2.5; maxShapeHeight = minShapeHeight * 2.5; } List<Shape> sample = new ArrayList<Shape>(sampleSize); int countBad = 0; while (sample.size() < sampleSize) { if (countBad >= 10) { minShapeWidth = 0; minShapeHeight = 0; maxShapeWidth = Double.MAX_VALUE; maxShapeHeight = Double.MAX_VALUE; } double random = Math.random(); int rowIndex = (int) Math.floor(random * rows.size()); Iterator<RowOfShapes> iRows = rows.iterator(); RowOfShapes row = null; for (int i = 0; i <= rowIndex; i++) { row = iRows.next(); } random = Math.random(); int index = (int) Math.floor(random * row.getShapes().size()); Shape shape = row.getShapes().get(index); if (shape.getWidth() > minShapeWidth && shape.getHeight() > minShapeHeight && shape.getWidth() < maxShapeWidth && shape.getHeight() < maxShapeHeight) { sample.add(shape); countBad = 0; } else { countBad++; } } return sample; }
From source file:org.apache.accumulo.core.util.Stat.java
public Stat() { min = new Min(); max = new Max(); sum = new Sum(); mean = new Mean(); sd = new StandardDeviation(); stats = new StorelessUnivariateStatistic[] { min, max, sum, mean, sd }; }
From source file:org.apache.drill.exec.physical.impl.orderedpartitioner.TestOrderedPartitionExchange.java
/** * Starts two drillbits and runs a physical plan with a Mock scan, project, OrderedParititionExchange, Union Exchange, * and sort. The final sort is done first on the partition column, and verifies that the partitions are correct, in that * all rows in partition 0 should come in the sort order before any row in partition 1, etc. Also verifies that the standard * deviation of the size of the partitions is less than one tenth the mean size of the partitions, because we expect all * the partitions to be roughly equal in size. * @throws Exception/* w ww . ja v a 2 s . c om*/ */ @Test public void twoBitTwoExchangeRun() throws Exception { RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet(); try (Drillbit bit1 = new Drillbit(CONFIG, serviceSet); Drillbit bit2 = new Drillbit(CONFIG, serviceSet); DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator());) { bit1.run(); bit2.run(); client.connect(); List<QueryDataBatch> results = client.runQuery( org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL, Files.toString(FileUtils.getResourceAsFile("/sender/ordered_exchange.json"), Charsets.UTF_8)); int count = 0; List<Integer> partitionRecordCounts = Lists.newArrayList(); for (QueryDataBatch b : results) { if (b.getData() != null) { int rows = b.getHeader().getRowCount(); count += rows; RecordBatchLoader loader = new RecordBatchLoader( new BootStrapContext(DrillConfig.create()).getAllocator()); loader.load(b.getHeader().getDef(), b.getData()); BigIntVector vv1 = (BigIntVector) loader.getValueAccessorById(BigIntVector.class, loader .getValueVectorId(new SchemaPath("col1", ExpressionPosition.UNKNOWN)).getFieldIds()) .getValueVector(); Float8Vector vv2 = (Float8Vector) loader.getValueAccessorById(Float8Vector.class, loader .getValueVectorId(new SchemaPath("col2", ExpressionPosition.UNKNOWN)).getFieldIds()) .getValueVector(); IntVector pVector = (IntVector) loader.getValueAccessorById(IntVector.class, loader.getValueVectorId(new SchemaPath("partition", ExpressionPosition.UNKNOWN)) .getFieldIds()) .getValueVector(); long previous1 = Long.MIN_VALUE; double previous2 = Double.MIN_VALUE; int partPrevious = -1; long current1 = Long.MIN_VALUE; double current2 = Double.MIN_VALUE; int partCurrent = -1; int partitionRecordCount = 0; for (int i = 0; i < rows; i++) { previous1 = current1; previous2 = current2; partPrevious = partCurrent; current1 = vv1.getAccessor().get(i); current2 = vv2.getAccessor().get(i); partCurrent = pVector.getAccessor().get(i); Assert.assertTrue(current1 >= previous1); if (current1 == previous1) { Assert.assertTrue(current2 <= previous2); } if (partCurrent == partPrevious || partPrevious == -1) { partitionRecordCount++; } else { partitionRecordCounts.add(partitionRecordCount); partitionRecordCount = 0; } } partitionRecordCounts.add(partitionRecordCount); loader.clear(); } b.release(); } double[] values = new double[partitionRecordCounts.size()]; int i = 0; for (Integer rc : partitionRecordCounts) { values[i++] = rc.doubleValue(); } StandardDeviation stdDev = new StandardDeviation(); Mean mean = new Mean(); double std = stdDev.evaluate(values); double m = mean.evaluate(values); System.out.println("mean: " + m + " std dev: " + std); //Assert.assertTrue(std < 0.1 * m); assertEquals(31000, count); } }
From source file:org.beedraz.semantics_II.expression.number.real.double64.stat.DoubleArithmeticMeanBeed.java
/** * @post getSource() == null;/*w w w. jav a2s. c om*/ * @post getDouble() == null; * @post owner != null ? owner.registerAggregateElement(this); */ public DoubleArithmeticMeanBeed(AggregateBeed owner) { super(new Mean(), owner); }