List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getMean
public double getMean()
From source file:knop.psfj.BeadFrame.java
/** * Gets the fitting parameter mean./* w w w . jav a 2s . c om*/ * * @param parameter the parameter * @return the fitting parameter mean */ public double getFittingParameterMean(int parameter) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int a = 0; a != 3; a++) { stats.addValue(getFittingParameter(a, parameter)); } return stats.getMean(); }
From source file:info.financialecology.finance.utilities.datastruct.VersatileTimeSeries.java
/** * Computes the mean of the data points this time series. * /*from w w w . jav a2 s. c o m*/ * @return the mean (or average) of this time series */ public double operatorMean() { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < this.getItemCount(); i++) stats.addValue(getValue(i).doubleValue()); return stats.getMean(); }
From source file:com.caseystella.analytics.outlier.streaming.mad.SketchyMovingMADIntegrationTest.java
@Test public void runAccuracyBenchmark() throws IOException { Map<String, List<String>> benchmarks = JSONUtil.INSTANCE.load( new FileInputStream(new File(new File(benchmarkRoot), "combined_labels.json")), new TypeReference<Map<String, List<String>>>() { });/*from w ww.ja v a 2 s . com*/ Assert.assertTrue(benchmarks.size() > 0); Map<ConfusionMatrix.ConfusionEntry, Long> overallConfusionMatrix = new HashMap<>(); DescriptiveStatistics globalExpectedScores = new DescriptiveStatistics(); long total = 0; for (Map.Entry<String, List<String>> kv : benchmarks.entrySet()) { File dataFile = new File(new File(benchmarkRoot), kv.getKey()); File plotFile = new File(new File(benchmarkRoot), kv.getKey() + ".dat"); Assert.assertTrue(dataFile.exists()); Set<Long> expectedOutliers = Sets.newHashSet(Iterables.transform(kv.getValue(), STR_TO_TS)); OutlierRunner runner = new OutlierRunner(outlierConfig, extractorConfigStr); final long[] numObservations = { 0L }; final long[] lastTimestamp = { Long.MIN_VALUE }; final DescriptiveStatistics timeDiffStats = new DescriptiveStatistics(); final Map<Long, Outlier> outlierMap = new HashMap<>(); final PrintWriter pw = new PrintWriter(plotFile); List<Outlier> outliers = runner.run(dataFile, 1, EnumSet.of(Severity.SEVERE_OUTLIER), new Function<Map.Entry<DataPoint, Outlier>, Void>() { @Nullable @Override public Void apply(@Nullable Map.Entry<DataPoint, Outlier> kv) { DataPoint dataPoint = kv.getKey(); Outlier outlier = kv.getValue(); pw.println(dataPoint.getTimestamp() + " " + outlier.getDataPoint().getValue() + " " + ((outlier.getSeverity() == Severity.SEVERE_OUTLIER) ? "outlier" : "normal")); outlierMap.put(dataPoint.getTimestamp(), outlier); numObservations[0] += 1; if (lastTimestamp[0] != Long.MIN_VALUE) { timeDiffStats.addValue(dataPoint.getTimestamp() - lastTimestamp[0]); } lastTimestamp[0] = dataPoint.getTimestamp(); return null; } }); pw.close(); total += numObservations[0]; Set<Long> calculatedOutliers = Sets .newHashSet(Iterables.transform(outliers, OutlierRunner.OUTLIER_TO_TS)); double stdDevDiff = Math.sqrt(timeDiffStats.getVariance()); System.out.println("Running data from " + kv.getKey() + " - E[time delta]: " + ConfusionMatrix.timeConversion((long) timeDiffStats.getMean()) + ", StdDev[time delta]: " + ConfusionMatrix.timeConversion((long) stdDevDiff) + " mean: " + runner.getMean()); Map<ConfusionMatrix.ConfusionEntry, Long> confusionMatrix = ConfusionMatrix.getConfusionMatrix( expectedOutliers, calculatedOutliers, numObservations[0], (long) timeDiffStats.getMean(), 3 //stdDevDiff > 30000?0:3 , outlierMap, globalExpectedScores); ConfusionMatrix.printConfusionMatrix(confusionMatrix); overallConfusionMatrix = ConfusionMatrix.merge(overallConfusionMatrix, confusionMatrix); } System.out.println("Really ran " + total); ConfusionMatrix.printConfusionMatrix(overallConfusionMatrix); ConfusionMatrix.printStats("Global Expected Outlier Scores", globalExpectedScores); }
From source file:com.joliciel.talismane.extensions.corpus.CorpusStatistics.java
@Override public void onNextParseConfiguration(ParseConfiguration parseConfiguration, Writer writer) { sentenceCount++;/*from ww w. java 2s.co m*/ sentenceLengthStats.addValue(parseConfiguration.getPosTagSequence().size()); for (PosTaggedToken posTaggedToken : parseConfiguration.getPosTagSequence()) { if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) continue; Token token = posTaggedToken.getToken(); String word = token.getOriginalText(); words.add(word); if (referenceWords != null) { if (!referenceWords.contains(word)) unknownTokenCount++; } if (alphanumeric.matcher(token.getOriginalText()).find()) { String lowercase = word.toLowerCase(talismaneSession.getLocale()); lowerCaseWords.add(lowercase); alphanumericCount++; if (referenceLowercaseWords != null) { if (!referenceLowercaseWords.contains(lowercase)) unknownAlphanumericCount++; } } tokenCount++; Integer countObj = posTagCounts.get(posTaggedToken.getTag().getCode()); int count = countObj == null ? 0 : countObj.intValue(); count++; posTagCounts.put(posTaggedToken.getTag().getCode(), count); } int maxDepth = 0; DescriptiveStatistics avgSyntaxDepthForSentenceStats = new DescriptiveStatistics(); for (DependencyArc arc : parseConfiguration.getDependencies()) { Integer countObj = depLabelCounts.get(arc.getLabel()); int count = countObj == null ? 0 : countObj.intValue(); count++; depLabelCounts.put(arc.getLabel(), count); totalDepCount++; if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0)) { // do nothing for unattached stuff (e.g. punctuation) } else if (arc.getLabel().equals("ponct")) { // do nothing for punctuation } else { int depth = 0; DependencyArc theArc = arc; while (theArc != null && !theArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)) { theArc = parseConfiguration.getGoverningDependency(theArc.getHead()); depth++; } if (depth > maxDepth) maxDepth = depth; syntaxDepthStats.addValue(depth); avgSyntaxDepthForSentenceStats.addValue(depth); int distance = Math .abs(arc.getHead().getToken().getIndex() - arc.getDependent().getToken().getIndex()); syntaxDistanceStats.addValue(distance); } maxSyntaxDepthStats.addValue(maxDepth); if (avgSyntaxDepthForSentenceStats.getN() > 0) avgSyntaxDepthStats.addValue(avgSyntaxDepthForSentenceStats.getMean()); } // we cheat a little bit by only allowing each arc to count once // there could be a situation where there are two independent non-projective arcs // crossing the same mother arc, but we prefer here to underestimate, // as this phenomenon is quite rare. Set<DependencyArc> nonProjectiveArcs = new HashSet<DependencyArc>(); int i = 0; for (DependencyArc arc : parseConfiguration.getDependencies()) { i++; if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0)) continue; if (nonProjectiveArcs.contains(arc)) continue; int headIndex = arc.getHead().getToken().getIndex(); int depIndex = arc.getDependent().getToken().getIndex(); int startIndex = headIndex < depIndex ? headIndex : depIndex; int endIndex = headIndex >= depIndex ? headIndex : depIndex; int j = 0; for (DependencyArc otherArc : parseConfiguration.getDependencies()) { j++; if (j <= i) continue; if (otherArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (otherArc.getLabel() == null || otherArc.getLabel().length() == 0)) continue; if (nonProjectiveArcs.contains(otherArc)) continue; int headIndex2 = otherArc.getHead().getToken().getIndex(); int depIndex2 = otherArc.getDependent().getToken().getIndex(); int startIndex2 = headIndex2 < depIndex2 ? headIndex2 : depIndex2; int endIndex2 = headIndex2 >= depIndex2 ? headIndex2 : depIndex2; boolean nonProjective = false; if (startIndex2 < startIndex && endIndex2 > startIndex && endIndex2 < endIndex) { nonProjective = true; } else if (startIndex2 > startIndex && startIndex2 < endIndex && endIndex2 > endIndex) { nonProjective = true; } if (nonProjective) { nonProjectiveArcs.add(arc); nonProjectiveArcs.add(otherArc); nonProjectiveCount++; LOG.debug("Non-projective arcs in sentence: " + parseConfiguration.getSentence().getText()); LOG.debug(arc.toString()); LOG.debug(otherArc.toString()); break; } } } }
From source file:edu.snu.leader.util.ParseableStatistics.java
/** * Called immediately after evaluation occurs. * * @param state The current state of evolution *//*from w w w . j a v a 2s . c o m*/ @Override public void postEvaluationStatistics(final EvolutionState state) { // Before we do anything, get the time long evalTime = (System.currentTimeMillis() - _evalStartTime); println("eval-time = " + evalTime, state); println("eval-time-human = " + TimeUnit.MILLISECONDS.toMinutes(evalTime) + "m " + TimeUnit.MILLISECONDS.toSeconds(evalTime) + "s", state); _evalTotalTime += evalTime; // Call the superclass impl super.postEvaluationStatistics(state); // Define the variables to prevent a lot of gc Individual bestOfGenInd = null; Individual currentInd = null; Subpopulation subPop = null; int subPopSize = 0; String prefix = null; double indFitness = 0.0d; // Get the statistics objects DescriptiveStatistics fitnessStats = new DescriptiveStatistics(); // Iterate over the sub-populations for (int i = 0; i < state.population.subpops.length; i++) { // Save some commonly accessed variables here subPop = state.population.subpops[i]; subPopSize = subPop.individuals.length; prefix = "subpop[" + _2_DIGIT_FORMATTER.format(i) + "]."; // Iterate over all the individuals in the sub-population bestOfGenInd = null; // _bestFound[i] = bestOfGenInd; // _bestFoundGen[i] = state.generation; for (int j = 0; j < subPopSize; j++) { // Get the current individual currentInd = subPop.individuals[j]; // Get the fitness statistic indFitness = ((SimpleFitness) currentInd.fitness).fitness(); fitnessStats.addValue(indFitness); // Is this individual the best found for this subpopulation // for this generation? if ((null == bestOfGenInd) || (currentInd.fitness.betterThan(bestOfGenInd.fitness))) { bestOfGenInd = currentInd; // Is it the best of the run? if ((_bestFound[i] == null) || (currentInd.fitness.betterThan(_bestFound[i].fitness))) { // Yup _bestFound[i] = currentInd; _bestFoundGen[i] = state.generation; } } } // Compute and log the mean values and variance of the fitness stats println(prefix + "fitness-mean = " + fitnessStats.getMean(), state); println(prefix + "fitness-variance = " + fitnessStats.getVariance(), state); println(prefix + "fitness-std-dev = " + fitnessStats.getStandardDeviation(), state); // Display the best individual's stats print(buildIndDescription(bestOfGenInd, state, true, prefix + "best-individual."), state); println(prefix + "best-individual-found-so-far.fitness = " + ((SimpleFitness) _bestFound[i].fitness).fitness(), state); println(prefix + "best-individual-found-so-far.generation = " + _bestFoundGen[i], state); } state.output.flush(); }
From source file:knop.psfj.BeadImage.java
/** * Calculate signal and noise./*from ww w . j av a 2 s .c o m*/ */ public void calculateSignalAndNoise() { if (bgMean == -1) { // ImageProcessor beadMask = getSegmentedImage().duplicate(); // bgMask.invert(); // middleImage.setMask(beadMask); DescriptiveStatistics stats = new DescriptiveStatistics(); DescriptiveStatistics aStats = new DescriptiveStatistics(); DescriptiveStatistics bStats = new DescriptiveStatistics(); for (BeadFrame bead : getBeadFrameList().getOnlyValidBeads()) { stats.addValue(bead.getMaximumIntensity()); aStats.addValue(bead.getFittingParameter(0, 0) - offset); bStats.addValue(bead.getFittingParameter(0, 1) - offset); } beadMaxIntensity = stats.getMean(); beadMaxStandardDeviation = stats.getStandardDeviation(); bgMean = aStats.getMean(); bgStdDev = aStats.getStandardDeviation(); beadMeanBParamter = bStats.getMean(); beadMeanBParameterStdDev = bStats.getStandardDeviation(); /* * beadMask.invert(); beadMask.dilate(); beadMask.dilate(); * beadMask.dilate(); beadMask.dilate(); for (double i = 0; i != 5; i += 0.5) { ImageProcessor middleImage = getMiddleImage().duplicate(); double threshold = getAutoThreshold(i); double realMean = middleImage.getStatistics().mean; double realStdDev = middleImage.getStatistics().stdDev; ImageProcessor bgMask = getSegmentationMask(middleImage, MathUtils.round(threshold)); middleImage.setMask(bgMask); dataset.addValue("i", i); dataset.addValue("threshold",threshold); dataset.addValue("bgMean", value); dataset.addValue("bgStandard",gbS) bgMean = middleImage.getStatistics().mean; bgStdDev = middleImage.getStatistics().stdDev; System.out .println(String .format( "Signal to noise : \nMean : %.0f,\nBg. Mean : %.0f,\nDev : %.0f", beadMeanIntensity, bgMean, bgStdDev)); new ImagePlus("bgMask", bgMask).show(); } /* * middleImage.setMask(beadMask); bgMean = * middleImage.getStatistics().mean; bgStdDev = * middleImage.getStatistics().stdDev; */ System.out.println( String.format("** Old Method **\nMean : %.0f,\nBg. Mean : %.0f,\nDev : %.0f\nBMean : %.0f\n", beadMaxIntensity, bgMean, bgStdDev, beadMeanBParamter)); } }
From source file:com.mapd.bench.Benchmark.java
String executeQuery(String sql, int expected, int iterations, int queryNum) { Connection conn = null;// ww w. ja va 2 s.co m Statement stmt = null; Long firstExecute = 0l; Long firstJdbc = 0l; Long firstIterate = 0l; DescriptiveStatistics statsExecute = new DescriptiveStatistics(); DescriptiveStatistics statsJdbc = new DescriptiveStatistics(); DescriptiveStatistics statsIterate = new DescriptiveStatistics(); DescriptiveStatistics statsTotal = new DescriptiveStatistics(); long totalTime = 0; try { //Open a connection logger.debug("Connecting to database url :" + url); conn = DriverManager.getConnection(url, iUser, iPasswd); long startTime = System.currentTimeMillis(); for (int loop = 0; loop < iterations; loop++) { //Execute a query stmt = conn.createStatement(); long timer = System.currentTimeMillis(); ResultSet rs = stmt.executeQuery(sql); long executeTime = 0; long jdbcTime = 0; // gather internal execute time for MapD as we are interested in that if (driver.equals(JDBC_DRIVER)) { executeTime = stmt.getQueryTimeout(); jdbcTime = (System.currentTimeMillis() - timer) - executeTime; } else { jdbcTime = (System.currentTimeMillis() - timer); executeTime = 0; } // this is fake to get our intenal execute time. logger.debug("Query Timeout/AKA internal Execution Time was " + stmt.getQueryTimeout() + " ms Elapsed time in JVM space was " + (System.currentTimeMillis() - timer) + "ms"); timer = System.currentTimeMillis(); //Extract data from result set int resultCount = 0; while (rs.next()) { Object obj = rs.getObject(1); if (obj != null && obj.equals(statsExecute)) { logger.info("Impossible"); } resultCount++; } long iterateTime = (System.currentTimeMillis() - timer); if (resultCount != expected) { logger.error("Expect " + expected + " actual " + resultCount + " for query " + sql); // don't run anymore break; } if (loop == 0) { firstJdbc = jdbcTime; firstExecute = executeTime; firstIterate = iterateTime; } else { statsJdbc.addValue(jdbcTime); statsExecute.addValue(executeTime); statsIterate.addValue(iterateTime); statsTotal.addValue(jdbcTime + executeTime + iterateTime); } //Clean-up environment rs.close(); stmt.close(); } totalTime = System.currentTimeMillis() - startTime; conn.close(); } catch (SQLException se) { //Handle errors for JDBC se.printStackTrace(); } catch (Exception e) { //Handle errors for Class.forName e.printStackTrace(); } finally { //finally block used to close resources try { if (stmt != null) { stmt.close(); } } catch (SQLException se2) { } // nothing we can do try { if (conn != null) { conn.close(); } } catch (SQLException se) { se.printStackTrace(); } //end finally try } //end try return String.format(lineDescriptor, queryNum, statsTotal.getMean(), statsTotal.getMin(), statsTotal.getMax(), statsTotal.getPercentile(85), statsExecute.getMean(), statsExecute.getMin(), statsExecute.getMax(), statsExecute.getPercentile(85), statsExecute.getPercentile(25), statsExecute.getStandardDeviation(), statsJdbc.getMean(), statsJdbc.getMin(), statsJdbc.getMax(), statsJdbc.getPercentile(85), statsIterate.getMean(), statsIterate.getMin(), statsIterate.getMax(), statsIterate.getPercentile(85), firstExecute, firstJdbc, firstIterate, iterations, totalTime, (long) statsTotal.getSum() + firstExecute + firstJdbc + firstIterate); }
From source file:edu.snu.leader.hierarchy.simple.DefaultReporter.java
/** * Report the final results of the simulation * * @see edu.snu.leader.hierarchy.simple.Reporter#reportFinalResults() *//* w ww . j a va2 s . c om*/ @Override public void reportFinalResults() { // Create some handy variables long firstActiveTimestep = Long.MAX_VALUE; long lastActiveTimestep = Long.MIN_VALUE; int initiatorCount = 0; // Gather some statistics DescriptiveStatistics immediateFollowerStats = new DescriptiveStatistics(); DescriptiveStatistics initiatorDistanceStats = new DescriptiveStatistics(); DescriptiveStatistics activeTimestepStats = new DescriptiveStatistics(); // Iterate through all the individuals Iterator<Individual> indIter = _simState.getAllIndividuals().iterator(); while (indIter.hasNext()) { Individual ind = indIter.next(); // Get some statistics immediateFollowerStats.addValue(ind.getImmediateFollowerCount()); initiatorDistanceStats.addValue(ind.getDistanceToInitiator()); activeTimestepStats.addValue(ind.getActiveTimestep()); // Build the prefix String prefix = "individual." + ind.getID() + "."; // Log out important information _writer.println(prefix + "group-id = " + ind.getGroupID()); _writer.println(prefix + "active-timestep = " + ind.getActiveTimestep()); _writer.println(prefix + "immediate-follower-count = " + ind.getImmediateFollowerCount()); _writer.println(prefix + "total-follower-count = " + ind.getTotalFollowerCount()); _writer.println(prefix + "distance-to-initiator = " + ind.getDistanceToInitiator()); _writer.println(prefix + "location = " + ind.getLocation().getX() + " " + ind.getLocation().getY()); _writer.println(prefix + "threshold = " + ind.getThreshold()); _writer.println(prefix + "skill = " + ind.getSkill()); _writer.println(prefix + "confidence = " + ind.getConfidence()); _writer.println(prefix + "reputation = " + ind.getReputation()); _writer.println(prefix + "boldness = " + ind.getBoldness()); // Get the leader's ID, if it exists Object leaderID = ""; if (null != ind.getLeader()) { leaderID = ind.getLeader().getIndividual().getID(); } else { ++initiatorCount; } _writer.println(prefix + "leader = " + leaderID); // Build the list of neighbor ID's StringBuilder builder = new StringBuilder(); Iterator<Neighbor> neighborIter = ind.getNearestNeighbors().iterator(); while (neighborIter.hasNext()) { builder.append(neighborIter.next().getIndividual().getID()); builder.append(" "); } _writer.println(prefix + "nearest-neighbors = " + builder.toString()); // Build the list of follower ID's builder = new StringBuilder(); neighborIter = ind.getFollowers().iterator(); while (neighborIter.hasNext()) { builder.append(neighborIter.next().getIndividual().getID()); builder.append(" "); } _writer.println(prefix + "immediate-followers = " + builder.toString()); // Check the activity time if (firstActiveTimestep > ind.getActiveTimestep()) { firstActiveTimestep = ind.getActiveTimestep(); } if (lastActiveTimestep < ind.getActiveTimestep()) { lastActiveTimestep = ind.getActiveTimestep(); } _writer.println(); } // Log the simulation information _writer.println("simulation.first-active-timestep = " + firstActiveTimestep); _writer.println("simulation.last-active-timestep = " + lastActiveTimestep); _writer.println("simulation.initiator-count = " + initiatorCount); // Log the stats _writer.println("statistics.immediate-followers.mean = " + immediateFollowerStats.getMean()); _writer.println( "statistics.immediate-followers.std-dev = " + immediateFollowerStats.getStandardDeviation()); _writer.println("statistics.immediate-followers.min = " + immediateFollowerStats.getMin()); _writer.println("statistics.immediate-followers.max = " + immediateFollowerStats.getMax()); _writer.println("statistics.initiator-distance.mean = " + initiatorDistanceStats.getMean()); _writer.println("statistics.initiator-distance.std-dev = " + initiatorDistanceStats.getStandardDeviation()); _writer.println("statistics.initiator-distance.min = " + initiatorDistanceStats.getMin()); _writer.println("statistics.initiator-distance.max = " + initiatorDistanceStats.getMax()); _writer.println("statistics.active-timestep.mean = " + activeTimestepStats.getMean()); _writer.println("statistics.active-timestep.std-dev = " + activeTimestepStats.getStandardDeviation()); _writer.println("statistics.active-timestep.min = " + activeTimestepStats.getMin()); _writer.println("statistics.active-timestep.max = " + activeTimestepStats.getMax()); // Log out the stop time _writer.println(); _writer.println(_STATS_SPACER); _writer.println("# Finished: " + (new Date())); // Close out the writer _writer.close(); }
From source file:com.mapd.bench.BenchmarkCloud.java
String executeQuery(Connection conn1, String qid, String sql, int iterations) { Statement stmt = null;// ww w . j a v a 2 s .c o m Connection conn = getConnection(url, iUser, iPasswd); Long firstExecute = 0l; Long firstJdbc = 0l; Long firstIterate = 0l; DescriptiveStatistics statsExecute = new DescriptiveStatistics(); DescriptiveStatistics statsJdbc = new DescriptiveStatistics(); DescriptiveStatistics statsIterate = new DescriptiveStatistics(); DescriptiveStatistics statsTotal = new DescriptiveStatistics(); long totalTime = 0; int resultCount = 0; try { long startTime = System.currentTimeMillis(); for (int loop = 0; loop < iterations; loop++) { //Execute a query stmt = conn.createStatement(); long timer = System.currentTimeMillis(); if (loop == 0) { System.out.println(String.format("Query Id is %s : query is '%s'", qid, sql)); } ResultSet rs = stmt.executeQuery(sql); long executeTime = 0; long jdbcTime = 0; // gather internal execute time for MapD as we are interested in that if (driver.equals(JDBC_DRIVER)) { executeTime = stmt.getQueryTimeout(); jdbcTime = (System.currentTimeMillis() - timer) - executeTime; } else { jdbcTime = (System.currentTimeMillis() - timer); executeTime = 0; } // this is fake to get our intenal execute time. logger.debug("Query Timeout/AKA internal Execution Time was " + stmt.getQueryTimeout() + " ms Elapsed time in JVM space was " + (System.currentTimeMillis() - timer) + "ms"); timer = System.currentTimeMillis(); //Extract data from result set resultCount = 0; while (rs.next()) { Object obj = rs.getObject(1); if (obj != null && obj.equals(statsExecute)) { logger.info("Impossible"); } resultCount++; } long iterateTime = (System.currentTimeMillis() - timer); // if (resultCount != expected) { // logger.error("Expect " + expected + " actual " + resultCount + " for query " + sql); // // don't run anymore // break; // } if (loop == 0) { firstJdbc = jdbcTime; firstExecute = executeTime; firstIterate = iterateTime; } else { statsJdbc.addValue(jdbcTime); statsExecute.addValue(executeTime); statsIterate.addValue(iterateTime); statsTotal.addValue(jdbcTime + executeTime + iterateTime); } //Clean-up environment rs.close(); stmt.close(); } totalTime = System.currentTimeMillis() - startTime; conn.close(); } catch (SQLException se) { //Handle errors for JDBC se.printStackTrace(); System.exit(4); } catch (Exception e) { //Handle errors for Class.forName e.printStackTrace(); System.exit(3); } finally { //finally block used to close resources try { if (stmt != null) { stmt.close(); } } catch (SQLException se2) { } // nothing we can do try { if (conn != null) { conn.close(); } } catch (SQLException se) { se.printStackTrace(); System.exit(6); } //end finally try } //end try // write it to the db here as well String insertPart = String.format(insertDescriptor, this.rid, this.rTimestamp, url, this.driver, label, gpuCount, this.tableName, qid, resultCount, "", statsTotal.getMean(), statsTotal.getMin(), statsTotal.getMax(), statsTotal.getPercentile(85), statsExecute.getMean(), statsExecute.getMin(), statsExecute.getMax(), statsExecute.getPercentile(85), statsExecute.getPercentile(25), statsExecute.getStandardDeviation(), statsJdbc.getMean(), statsJdbc.getMin(), statsJdbc.getMax(), statsJdbc.getPercentile(85), statsIterate.getMean(), statsIterate.getMin(), statsIterate.getMax(), statsIterate.getPercentile(85), firstExecute, firstJdbc, firstIterate, iterations, totalTime, (long) statsTotal.getSum() + firstExecute + firstJdbc + firstIterate, targetDBVersion); LResult.add("Insert into results values " + insertPart); return String.format(lineDescriptor, qid, statsTotal.getMean(), statsTotal.getMin(), statsTotal.getMax(), statsTotal.getPercentile(85), statsExecute.getMean(), statsExecute.getMin(), statsExecute.getMax(), statsExecute.getPercentile(85), statsExecute.getPercentile(25), statsExecute.getStandardDeviation(), statsJdbc.getMean(), statsJdbc.getMin(), statsJdbc.getMax(), statsJdbc.getPercentile(85), statsIterate.getMean(), statsIterate.getMin(), statsIterate.getMax(), statsIterate.getPercentile(85), firstExecute, firstJdbc, firstIterate, iterations, totalTime, (long) statsTotal.getSum() + firstExecute + firstJdbc + firstIterate); }
From source file:com.screenslicer.core.scrape.type.ComparableNode.java
public ComparableNode(final Node node) { this.node = node; List<Node> separated = node.childNodes(); int children = 0; int childBlocks = 0; int childFormatting = 0; int childContent = 0; int childItems = 0; int childDecoration = 0; int anchorChildren = 0; int textChildren = 0; int anchorTextChildren = 0; int anchorChildItems = 0; int textChildItems = 0; int anchorTextChildItems = 0; int itemChars = 0; int itemAnchorChars = 0; List<String> firstChildTags = null; List<List<String>> orderedTags = new ArrayList<List<String>>(); List<String> allChildTags = new ArrayList<String>(); ArrayList<List<String>> childTags = new ArrayList<List<String>>(); boolean childrenConsistent = true; String childName = null;/*from w w w.j a v a 2 s . c o m*/ boolean childrenSame = true; double avgChildLengthDouble = 0d; int nodeStrLen = Util.trimmedLen(node.toString()); DescriptiveStatistics statAnchorChars = new DescriptiveStatistics(); DescriptiveStatistics statAnchors = new DescriptiveStatistics(); DescriptiveStatistics statChars = new DescriptiveStatistics(); DescriptiveStatistics statDescendants = new DescriptiveStatistics(); DescriptiveStatistics statFields = new DescriptiveStatistics(); DescriptiveStatistics statLevels = new DescriptiveStatistics(); DescriptiveStatistics statLongestField = new DescriptiveStatistics(); DescriptiveStatistics statNonAnchorChars = new DescriptiveStatistics(); DescriptiveStatistics statTextAnchors = new DescriptiveStatistics(); DescriptiveStatistics statStrLen = new DescriptiveStatistics(); DescriptiveStatistics statItemChars = new DescriptiveStatistics(); DescriptiveStatistics statItemAnchorChars = new DescriptiveStatistics(); for (Node child : separated) { if (!Util.isEmpty(child)) { children++; int childStrLen = Util.trimmedLen(child.toString()); avgChildLengthDouble += childStrLen; NodeCounter counter = new NodeCounter(child); if (Util.isItem(child.nodeName())) { ++childItems; anchorChildItems += counter.anchors() > 0 ? 1 : 0; textChildItems += counter.fields() > 0 ? 1 : 0; anchorTextChildItems += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0; itemChars += counter.chars(); itemAnchorChars += counter.anchorChars(); statItemChars.addValue(counter.chars()); statItemAnchorChars.addValue(counter.anchorChars()); } if (Util.isBlock(child.nodeName())) { ++childBlocks; } if (Util.isDecoration(child.nodeName())) { ++childDecoration; } if (Util.isFormatting(child.nodeName())) { ++childFormatting; } if (Util.isContent(child)) { ++childContent; } anchorChildren += counter.anchors() > 0 ? 1 : 0; textChildren += counter.fields() > 0 ? 1 : 0; anchorTextChildren += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0; statAnchorChars.addValue(counter.anchorChars()); statAnchors.addValue(counter.anchors()); statChars.addValue(counter.chars()); statDescendants.addValue(counter.descendants()); statFields.addValue(counter.fields()); statLevels.addValue(counter.levels()); statLongestField.addValue(counter.longestField()); statNonAnchorChars.addValue(counter.nonAnchorChars()); statTextAnchors.addValue(counter.textAnchors()); statStrLen.addValue(childStrLen); List<String> curChildTags = counter.tags(); allChildTags = Util.join(allChildTags, curChildTags); childTags.add(curChildTags); if (firstChildTags == null) { firstChildTags = curChildTags; } else if (childrenConsistent && !Util.isSame(firstChildTags, curChildTags)) { childrenConsistent = false; } if (childName == null) { childName = child.nodeName(); } else if (childrenSame && !childName.equals(child.nodeName())) { childrenSame = false; } if (!Util.contains(counter.orderedTags(), orderedTags)) { orderedTags.add(counter.orderedTags()); } } } avgChildLengthDouble = children == 0 ? 0 : avgChildLengthDouble / (double) children; int avgChildLength = (int) avgChildLengthDouble; double avgChildDiff = 0; int maxChildDiff = 0; for (List<String> tagList : childTags) { avgChildDiff += allChildTags.size() - tagList.size(); maxChildDiff = Math.max(maxChildDiff, allChildTags.size() - tagList.size()); } avgChildDiff = childTags.size() == 0 ? 0 : avgChildDiff / (double) childTags.size(); childrenConsistent = firstChildTags != null && !firstChildTags.isEmpty() && childrenConsistent; NodeCounter counter = new NodeCounter(separated); int siblings = 0; for (Node sibling : node.parent().childNodes()) { if (!Util.isEmpty(sibling)) { siblings++; } } this.scores = new int[] { counter.items(), counter.blocks(), counter.decoration(), counter.formatting(), counter.content(), div(counter.items(), children), div(counter.blocks(), children), div(counter.decoration(), children), div(counter.formatting(), children), div(counter.content(), children), childItems, childBlocks, childDecoration, childFormatting, childContent, avgChildLength, counter.fields(), textChildItems, counter.images(), counter.anchors(), counter.textAnchors(), div(counter.chars(), Math.max(1, counter.fields())), div(itemChars, Math.max(1, textChildItems)), counter.longestField(), nodeStrLen, div(nodeStrLen, children), counter.anchorLen(), counter.chars(), itemChars, div(counter.chars(), children), div(itemChars, childItems), counter.nonAnchorChars(), div(counter.nonAnchorChars(), children), div(counter.nonAnchorChars(), childItems), div(counter.nonAnchorChars(), childBlocks), div(counter.nonAnchorChars(), childContent), div(counter.nonAnchorChars(), counter.anchors()), div(counter.nonAnchorChars(), counter.textAnchors()), counter.anchorChars(), itemAnchorChars, div(itemAnchorChars, anchorChildItems), div(counter.anchorChars(), counter.anchors()), div(counter.anchorChars(), counter.textAnchors()), div(counter.anchorChars(), children), counter.descendants(), counter.levels(), div(counter.descendants(), children), div(children, counter.levels()), siblings, children, maxChildDiff, toInt(avgChildDiff), toInt(childrenSame), toInt(childrenConsistent), orderedTags.size(), mod0(children, RESULT_GROUP_LARGE), mod0(children, RESULT_GROUP_SMALL), distance(children, RESULT_GROUP_LARGE), distance(children, RESULT_GROUP_SMALL), mod0(childItems, RESULT_GROUP_LARGE), mod0(childItems, RESULT_GROUP_SMALL), distance(childItems, RESULT_GROUP_LARGE), distance(childItems, RESULT_GROUP_SMALL), mod0(childBlocks, RESULT_GROUP_LARGE), mod0(childBlocks, RESULT_GROUP_SMALL), distance(childBlocks, RESULT_GROUP_LARGE), distance(childBlocks, RESULT_GROUP_SMALL), mod0(childContent, RESULT_GROUP_LARGE), mod0(childContent, RESULT_GROUP_SMALL), distance(childContent, RESULT_GROUP_LARGE), distance(childContent, RESULT_GROUP_SMALL), mod0(counter.anchors(), RESULT_GROUP_LARGE), mod0(counter.anchors(), RESULT_GROUP_SMALL), distance(counter.anchors(), RESULT_GROUP_LARGE), distance(counter.anchors(), RESULT_GROUP_SMALL), mod0(anchorChildItems, RESULT_GROUP_LARGE), mod0(anchorChildItems, RESULT_GROUP_SMALL), distance(anchorChildItems, RESULT_GROUP_LARGE), distance(anchorChildItems, RESULT_GROUP_SMALL), mod0(textChildItems, RESULT_GROUP_LARGE), mod0(textChildItems, RESULT_GROUP_SMALL), distance(textChildItems, RESULT_GROUP_LARGE), distance(textChildItems, RESULT_GROUP_SMALL), mod0(counter.textAnchors(), RESULT_GROUP_LARGE), mod0(counter.textAnchors(), RESULT_GROUP_SMALL), distance(counter.textAnchors(), RESULT_GROUP_LARGE), distance(counter.textAnchors(), RESULT_GROUP_SMALL), Math.abs(children - counter.anchors()), Math.abs(childItems - counter.anchors()), evenlyDivisible(children, counter.anchors()), evenlyDivisible(childItems, counter.anchors()), smallestMod(children, counter.anchors()), smallestMod(childItems, counter.anchors()), Math.abs(children - counter.textAnchors()), Math.abs(childItems - counter.textAnchors()), Math.abs(children - anchorChildren), Math.abs(childItems - anchorChildItems), Math.abs(children - textChildren), Math.abs(childItems - textChildItems), Math.abs(children - anchorTextChildren), Math.abs(childItems - anchorTextChildItems), evenlyDivisible(children, counter.textAnchors()), evenlyDivisible(childItems, counter.textAnchors()), evenlyDivisible(children, anchorChildren), evenlyDivisible(childItems, anchorChildItems), evenlyDivisible(children, textChildren), evenlyDivisible(childItems, textChildItems), evenlyDivisible(children, anchorTextChildren), evenlyDivisible(childItems, anchorTextChildItems), smallestMod(children, counter.textAnchors()), smallestMod(childItems, counter.textAnchors()), smallestMod(children, anchorChildren), smallestMod(childItems, anchorChildItems), smallestMod(children, textChildren), smallestMod(childItems, textChildItems), smallestMod(children, anchorTextChildren), smallestMod(childItems, anchorTextChildItems), Math.abs(anchorChildren - anchorChildItems), Math.abs(textChildren - textChildItems), Math.abs(anchorTextChildren - anchorTextChildItems), toInt(statAnchorChars.getSkewness()), toInt(statAnchorChars.getStandardDeviation()), toInt(statAnchorChars.getMean()), toInt(statAnchors.getSkewness()), toInt(statAnchors.getStandardDeviation()), toInt(statAnchors.getMean()), toInt(statChars.getSkewness()), toInt(statChars.getStandardDeviation()), toInt(statChars.getMean()), toInt(statDescendants.getSkewness()), toInt(statDescendants.getStandardDeviation()), toInt(statDescendants.getMean()), toInt(statFields.getSkewness()), toInt(statFields.getStandardDeviation()), toInt(statFields.getMean()), toInt(statLevels.getSkewness()), toInt(statLevels.getStandardDeviation()), toInt(statLevels.getMean()), toInt(statLongestField.getSkewness()), toInt(statLongestField.getStandardDeviation()), toInt(statLongestField.getMean()), toInt(statNonAnchorChars.getSkewness()), toInt(statNonAnchorChars.getStandardDeviation()), toInt(statNonAnchorChars.getMean()), toInt(statStrLen.getSkewness()), toInt(statStrLen.getStandardDeviation()), toInt(statStrLen.getMean()), toInt(statTextAnchors.getSkewness()), toInt(statTextAnchors.getStandardDeviation()), toInt(statTextAnchors.getMean()), toInt(statItemChars.getSkewness()), toInt(statItemChars.getStandardDeviation()), toInt(statItemChars.getMean()), toInt(statItemAnchorChars.getSkewness()), toInt(statItemAnchorChars.getStandardDeviation()), toInt(statItemAnchorChars.getMean()), }; }