List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:com.caseystella.analytics.outlier.streaming.mad.ConfusionMatrix.java
License:Apache License
public static Map<ConfusionEntry, Long> getConfusionMatrix(Set<Long> expectedOutliers, Set<Long> computedOutliers, LongWritable numObservations, long meanDiffBetweenTs, int timeBounds, Map<Long, Outlier> outlierMap, DescriptiveStatistics globalExpectedOutlierScoreStats) { Map<ConfusionEntry, Long> ret = new HashMap<>(); for (ResultType r : ResultType.values()) { for (ResultType s : ResultType.values()) { ret.put(new ConfusionEntry(r, s), 0L); }/*from w w w .j a v a2s . c o m*/ } int unionSize = 0; DescriptiveStatistics expectedOutlierScoreStats = new DescriptiveStatistics(); for (Long expectedOutlier : expectedOutliers) { Outlier o = outlierMap.get(expectedOutlier); if (o.getScore() != null) { expectedOutlierScoreStats.addValue(o.getScore()); globalExpectedOutlierScoreStats.addValue(o.getScore()); } if (setContains(computedOutliers, expectedOutlier, meanDiffBetweenTs, timeBounds)) { ConfusionEntry entry = new ConfusionEntry(ResultType.OUTLIER, ResultType.OUTLIER); ConfusionEntry.increment(entry, ret); unionSize++; } else { ConfusionEntry entry = new ConfusionEntry(ResultType.NON_OUTLIER, ResultType.OUTLIER); long closest = closest(expectedOutlier, computedOutliers); long delta = Math.abs(expectedOutlier - closest); if (closest != Long.MAX_VALUE) { System.out.println("Missed an outlier (" + expectedOutlier + ") wasn't in computed outliers (" + o + "), closest point is " + closest + " which is " + timeConversion(delta) + "away. - E[delta t] " + timeConversion(meanDiffBetweenTs) + ""); } else { System.out.println("Missed an outlier (" + expectedOutlier + ") wasn't in computed outliers (" + o + "), which is empty. - E[delta t] " + timeConversion(meanDiffBetweenTs) + ""); } ConfusionEntry.increment(entry, ret); unionSize++; } } printStats("Expected Outlier Score Stats", expectedOutlierScoreStats); DescriptiveStatistics computedOutlierScoreStats = new DescriptiveStatistics(); for (Long computedOutlier : computedOutliers) { if (!setContains(expectedOutliers, computedOutlier, meanDiffBetweenTs, timeBounds)) { Outlier o = outlierMap.get(computedOutlier); if (o.getScore() != null) { computedOutlierScoreStats.addValue(o.getScore()); } ConfusionEntry entry = new ConfusionEntry(ResultType.OUTLIER, ResultType.NON_OUTLIER); ConfusionEntry.increment(entry, ret); unionSize++; } } printStats("Computed Outlier Scores", computedOutlierScoreStats); ret.put(new ConfusionEntry(ResultType.NON_OUTLIER, ResultType.NON_OUTLIER), numObservations.get() - unionSize); Assert.assertEquals(numObservations.get(), getTotalNum(ret)); return ret; }
From source file:com.caseystella.analytics.outlier.streaming.mad.SketchyMovingMADIntegrationTest.java
License:Apache License
@Test public void runAccuracyBenchmark() throws IOException { Map<String, List<String>> benchmarks = JSONUtil.INSTANCE.load( new FileInputStream(new File(new File(benchmarkRoot), "combined_labels.json")), new TypeReference<Map<String, List<String>>>() { });/*from www. j a va2s. c o m*/ Assert.assertTrue(benchmarks.size() > 0); Map<ConfusionMatrix.ConfusionEntry, Long> overallConfusionMatrix = new HashMap<>(); DescriptiveStatistics globalExpectedScores = new DescriptiveStatistics(); long total = 0; for (Map.Entry<String, List<String>> kv : benchmarks.entrySet()) { File dataFile = new File(new File(benchmarkRoot), kv.getKey()); File plotFile = new File(new File(benchmarkRoot), kv.getKey() + ".dat"); Assert.assertTrue(dataFile.exists()); Set<Long> expectedOutliers = Sets.newHashSet(Iterables.transform(kv.getValue(), STR_TO_TS)); OutlierRunner runner = new OutlierRunner(outlierConfig, extractorConfigStr); final LongWritable numObservations = new LongWritable(0); final LongWritable lastTimestamp = new LongWritable(Long.MIN_VALUE); final DescriptiveStatistics timeDiffStats = new DescriptiveStatistics(); final Map<Long, Outlier> outlierMap = new HashMap<>(); final PrintWriter pw = new PrintWriter(plotFile); List<Outlier> outliers = runner.run(dataFile, 1, EnumSet.of(Severity.SEVERE_OUTLIER), new Function<Map.Entry<DataPoint, Outlier>, Void>() { @Nullable @Override public Void apply(@Nullable Map.Entry<DataPoint, Outlier> kv) { DataPoint dataPoint = kv.getKey(); Outlier outlier = kv.getValue(); pw.println(dataPoint.getTimestamp() + " " + outlier.getDataPoint().getValue() + " " + ((outlier.getSeverity() == Severity.SEVERE_OUTLIER) ? "outlier" : "normal")); outlierMap.put(dataPoint.getTimestamp(), outlier); numObservations.set(numObservations.get() + 1); if (lastTimestamp.get() != Long.MIN_VALUE) { timeDiffStats.addValue(dataPoint.getTimestamp() - lastTimestamp.get()); } lastTimestamp.set(dataPoint.getTimestamp()); return null; } }); pw.close(); total += numObservations.get(); Set<Long> calculatedOutliers = Sets .newHashSet(Iterables.transform(outliers, OutlierRunner.OUTLIER_TO_TS)); double stdDevDiff = Math.sqrt(timeDiffStats.getVariance()); System.out.println("Running data from " + kv.getKey() + " - E[time delta]: " + ConfusionMatrix.timeConversion((long) timeDiffStats.getMean()) + ", StdDev[time delta]: " + ConfusionMatrix.timeConversion((long) stdDevDiff) + " mean: " + runner.getMean()); Map<ConfusionMatrix.ConfusionEntry, Long> confusionMatrix = ConfusionMatrix.getConfusionMatrix( expectedOutliers, calculatedOutliers, numObservations, (long) timeDiffStats.getMean(), 3 //stdDevDiff > 30000?0:3 , outlierMap, globalExpectedScores); ConfusionMatrix.printConfusionMatrix(confusionMatrix); overallConfusionMatrix = ConfusionMatrix.merge(overallConfusionMatrix, confusionMatrix); } System.out.println("Really ran " + total); ConfusionMatrix.printConfusionMatrix(overallConfusionMatrix); ConfusionMatrix.printStats("Global Expected Outlier Scores", globalExpectedScores); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelCountingReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0; for (LongWritable value : values) { context.setStatus("Parallel Counting Reducer :" + key); sum += value.get(); }// ww w .ja v a2 s. c o m context.setStatus("Parallel Counting Reducer: " + key + " => " + sum); context.write(key, new LongWritable(sum)); }
From source file:com.cloudera.impala.hive.executor.TestUdf.java
License:Apache License
public LongWritable evaluate(LongWritable a) { if (a == null) return null; return new LongWritable(a.get()); }
From source file:com.cloudera.impala.IncompatibleUdfTest.java
License:Apache License
public LongWritable evaluate(LongWritable a, List<IntWritable> b) { if (a == null) return null; return new LongWritable(a.get()); }
From source file:com.cloudera.sa.giraph.examples.componentisation.ComponentisationVertex.java
License:Apache License
@Override public void compute(Iterable<LongWritable> messages) throws IOException { boolean updated = false; if (getSuperstep() == 0) { // In Superstep 0, we already know our neighbour's state is equal to their id (by definition), // so we cheat a little by using that knowledge, saving a superstep. long lowestId = getId().get(); for (Edge<LongWritable, NullWritable> edge : getEdges()) { lowestId = Math.min(lowestId, edge.getTargetVertexId().get()); }//ww w. ja va2 s .c o m if (lowestId < getId().get()) { getValue().set(lowestId); updated = true; } } else { // In all other supersteps we have to process messages to see if we should be updated long lowestValue = getValue().get(); for (LongWritable message : messages) { lowestValue = Math.min(lowestValue, message.get()); } if (lowestValue < getValue().get()) { getValue().set(lowestValue); updated = true; } } if (updated) { sendMessageToAllEdges(getValue()); } voteToHalt(); }
From source file:com.cloudera.sa.giraph.examples.kmeans.KMeansVertex.java
License:Apache License
@Override public void compute(Iterable<LongWritable> messages) throws IOException { // In the first superstep, we compute the ranges of the dimensions if (getSuperstep() == 0) { aggregate(Constants.MAX, getValue().getPoint()); aggregate(Constants.MIN, getValue().getPoint()); return;/*from w ww . j a va2 s. c om*/ } else { // If there were no cluster reassignments in the previous superstep, we're done. // (Other stopping criteria (not implemented here) could include a fixed number of // iterations, cluster centres that are not moving, or the Residual Sum of Squares // (RSS) is below a certain threshold. if (getSuperstep() > 1) { LongWritable updates = getAggregatedValue(Constants.UPDATES); if (updates.get() == 0) { voteToHalt(); return; } } // If we're not stopping, we need to compute the closest cluster to this node int k = getConf().getInt(Constants.K, 3); PointWritable[] means = new PointWritable[k]; int closest = -1; double closestDistance = Double.MAX_VALUE; for (int i = 0; i < k; i++) { means[i] = getAggregatedValue(Constants.POINT_PREFIX + i); double d = distance(getValue().getPoint().getData(), means[i].getData()); if (d < closestDistance) { closestDistance = d; closest = i; } } // If the choice of cluster has changed, aggregate an update so the we recompute // on the next iteration. if (closest != getValue().getCluster()) { aggregate(Constants.UPDATES, one); } // Ensure that the closest cluster position is updated, irrespective of whether or // not the choice of cluster has changed. NodeState state = getValue(); state.setCluster(closest); state.setClusterCentre(means[closest]); setValue(state); // Prepare the next iteration by aggregating this point into the closest cluster. aggregate(Constants.POINT_PREFIX + closest, getValue().getPoint()); } }
From source file:com.cloudera.sa.giraph.examples.ktrusses.MasterCompute.java
License:Apache License
@Override public void compute() { PhaseWritable phase = getAggregatedValue(Constants.PHASE); LongWritable updates = getAggregatedValue(Constants.UPDATES); if (phase.get() == null) { phase.set(Phase.KCORE);/*from w w w . j a v a2 s. c o m*/ } else if (phase.get() == Phase.KCORE) { if (updates.get() == 0) { phase.set(Phase.DEGREE); } else { phase.set(Phase.KCORE); } } else if (phase.get() == Phase.DEGREE) { phase.set(Phase.QUERY_FOR_CLOSING_EDGES); } else if (phase.get() == Phase.QUERY_FOR_CLOSING_EDGES) { phase.set(Phase.FIND_TRIANGLES); } else if (phase.get() == Phase.FIND_TRIANGLES) { phase.set(Phase.TRUSSES); } else if (phase.get() == Phase.TRUSSES) { if (updates.get() == 0) { phase.set(Phase.COMPONENTISATION_1); } else { phase.set(Phase.DEGREE); } } else if (phase.get() == Phase.COMPONENTISATION_1) { phase.set(Phase.COMPONENTISATION_2); } else if (phase.get() == Phase.COMPONENTISATION_2) { if (updates.get() == 0) { phase.set(Phase.OUTPUT); } else { phase.set(Phase.COMPONENTISATION_2); } } setAggregatedValue(Constants.PHASE, phase); }
From source file:com.cloudera.sa.giraph.examples.ktrusses.Message.java
License:Apache License
public Message(LongWritable source, IntWritable degree) { this.type = Type.DEGREE_UPDATE; this.source = new LongWritable(source.get()); this.degree = new IntWritable(degree.get()); }
From source file:com.cloudera.sa.giraph.examples.ktrusses.Message.java
License:Apache License
public Message(LongWritable triadA, LongWritable triadB) { this.type = Type.OPEN_TRIAD; this.triadA = new LongWritable(triadA.get()); this.triadB = new LongWritable(triadB.get()); }