Example usage for org.apache.hadoop.io LongWritable get

List of usage examples for org.apache.hadoop.io LongWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable get.

Prototype

public long get() 

Source Link

Document

Return the value of this LongWritable.

Usage

From source file:com.caseystella.analytics.outlier.streaming.mad.ConfusionMatrix.java

License:Apache License

public static Map<ConfusionEntry, Long> getConfusionMatrix(Set<Long> expectedOutliers,
        Set<Long> computedOutliers, LongWritable numObservations, long meanDiffBetweenTs, int timeBounds,
        Map<Long, Outlier> outlierMap, DescriptiveStatistics globalExpectedOutlierScoreStats) {
    Map<ConfusionEntry, Long> ret = new HashMap<>();
    for (ResultType r : ResultType.values()) {
        for (ResultType s : ResultType.values()) {
            ret.put(new ConfusionEntry(r, s), 0L);
        }/*from  w w  w .j a v a2s  . c  o m*/
    }
    int unionSize = 0;
    DescriptiveStatistics expectedOutlierScoreStats = new DescriptiveStatistics();
    for (Long expectedOutlier : expectedOutliers) {
        Outlier o = outlierMap.get(expectedOutlier);
        if (o.getScore() != null) {
            expectedOutlierScoreStats.addValue(o.getScore());
            globalExpectedOutlierScoreStats.addValue(o.getScore());
        }
        if (setContains(computedOutliers, expectedOutlier, meanDiffBetweenTs, timeBounds)) {
            ConfusionEntry entry = new ConfusionEntry(ResultType.OUTLIER, ResultType.OUTLIER);
            ConfusionEntry.increment(entry, ret);
            unionSize++;
        } else {
            ConfusionEntry entry = new ConfusionEntry(ResultType.NON_OUTLIER, ResultType.OUTLIER);
            long closest = closest(expectedOutlier, computedOutliers);
            long delta = Math.abs(expectedOutlier - closest);
            if (closest != Long.MAX_VALUE) {
                System.out.println("Missed an outlier (" + expectedOutlier + ") wasn't in computed outliers ("
                        + o + "), closest point is " + closest + " which is " + timeConversion(delta)
                        + "away. - E[delta t] " + timeConversion(meanDiffBetweenTs) + "");
            } else {
                System.out.println("Missed an outlier (" + expectedOutlier + ") wasn't in computed outliers ("
                        + o + "), which is empty. - E[delta t] " + timeConversion(meanDiffBetweenTs) + "");
            }
            ConfusionEntry.increment(entry, ret);
            unionSize++;
        }
    }
    printStats("Expected Outlier Score Stats", expectedOutlierScoreStats);
    DescriptiveStatistics computedOutlierScoreStats = new DescriptiveStatistics();
    for (Long computedOutlier : computedOutliers) {
        if (!setContains(expectedOutliers, computedOutlier, meanDiffBetweenTs, timeBounds)) {
            Outlier o = outlierMap.get(computedOutlier);
            if (o.getScore() != null) {
                computedOutlierScoreStats.addValue(o.getScore());
            }
            ConfusionEntry entry = new ConfusionEntry(ResultType.OUTLIER, ResultType.NON_OUTLIER);
            ConfusionEntry.increment(entry, ret);
            unionSize++;
        }
    }
    printStats("Computed Outlier Scores", computedOutlierScoreStats);
    ret.put(new ConfusionEntry(ResultType.NON_OUTLIER, ResultType.NON_OUTLIER),
            numObservations.get() - unionSize);
    Assert.assertEquals(numObservations.get(), getTotalNum(ret));
    return ret;
}

From source file:com.caseystella.analytics.outlier.streaming.mad.SketchyMovingMADIntegrationTest.java

License:Apache License

@Test
public void runAccuracyBenchmark() throws IOException {
    Map<String, List<String>> benchmarks = JSONUtil.INSTANCE.load(
            new FileInputStream(new File(new File(benchmarkRoot), "combined_labels.json")),
            new TypeReference<Map<String, List<String>>>() {
            });/*from   www.  j  a va2s. c o  m*/
    Assert.assertTrue(benchmarks.size() > 0);
    Map<ConfusionMatrix.ConfusionEntry, Long> overallConfusionMatrix = new HashMap<>();
    DescriptiveStatistics globalExpectedScores = new DescriptiveStatistics();
    long total = 0;
    for (Map.Entry<String, List<String>> kv : benchmarks.entrySet()) {
        File dataFile = new File(new File(benchmarkRoot), kv.getKey());
        File plotFile = new File(new File(benchmarkRoot), kv.getKey() + ".dat");
        Assert.assertTrue(dataFile.exists());
        Set<Long> expectedOutliers = Sets.newHashSet(Iterables.transform(kv.getValue(), STR_TO_TS));
        OutlierRunner runner = new OutlierRunner(outlierConfig, extractorConfigStr);
        final LongWritable numObservations = new LongWritable(0);
        final LongWritable lastTimestamp = new LongWritable(Long.MIN_VALUE);
        final DescriptiveStatistics timeDiffStats = new DescriptiveStatistics();
        final Map<Long, Outlier> outlierMap = new HashMap<>();
        final PrintWriter pw = new PrintWriter(plotFile);
        List<Outlier> outliers = runner.run(dataFile, 1, EnumSet.of(Severity.SEVERE_OUTLIER),
                new Function<Map.Entry<DataPoint, Outlier>, Void>() {
                    @Nullable
                    @Override
                    public Void apply(@Nullable Map.Entry<DataPoint, Outlier> kv) {
                        DataPoint dataPoint = kv.getKey();
                        Outlier outlier = kv.getValue();
                        pw.println(dataPoint.getTimestamp() + " " + outlier.getDataPoint().getValue() + " "
                                + ((outlier.getSeverity() == Severity.SEVERE_OUTLIER) ? "outlier" : "normal"));
                        outlierMap.put(dataPoint.getTimestamp(), outlier);
                        numObservations.set(numObservations.get() + 1);
                        if (lastTimestamp.get() != Long.MIN_VALUE) {
                            timeDiffStats.addValue(dataPoint.getTimestamp() - lastTimestamp.get());
                        }
                        lastTimestamp.set(dataPoint.getTimestamp());
                        return null;
                    }
                });
        pw.close();
        total += numObservations.get();
        Set<Long> calculatedOutliers = Sets
                .newHashSet(Iterables.transform(outliers, OutlierRunner.OUTLIER_TO_TS));
        double stdDevDiff = Math.sqrt(timeDiffStats.getVariance());
        System.out.println("Running data from " + kv.getKey() + " - E[time delta]: "
                + ConfusionMatrix.timeConversion((long) timeDiffStats.getMean()) + ", StdDev[time delta]: "
                + ConfusionMatrix.timeConversion((long) stdDevDiff) + " mean: " + runner.getMean());
        Map<ConfusionMatrix.ConfusionEntry, Long> confusionMatrix = ConfusionMatrix.getConfusionMatrix(
                expectedOutliers, calculatedOutliers, numObservations, (long) timeDiffStats.getMean(), 3 //stdDevDiff > 30000?0:3
                , outlierMap, globalExpectedScores);

        ConfusionMatrix.printConfusionMatrix(confusionMatrix);
        overallConfusionMatrix = ConfusionMatrix.merge(overallConfusionMatrix, confusionMatrix);
    }
    System.out.println("Really ran " + total);
    ConfusionMatrix.printConfusionMatrix(overallConfusionMatrix);
    ConfusionMatrix.printStats("Global Expected Outlier Scores", globalExpectedScores);
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelCountingReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context)
        throws IOException, InterruptedException {
    long sum = 0;
    for (LongWritable value : values) {
        context.setStatus("Parallel Counting Reducer :" + key);
        sum += value.get();
    }// ww  w .ja v a2  s.  c  o  m
    context.setStatus("Parallel Counting Reducer: " + key + " => " + sum);
    context.write(key, new LongWritable(sum));

}

From source file:com.cloudera.impala.hive.executor.TestUdf.java

License:Apache License

public LongWritable evaluate(LongWritable a) {
    if (a == null)
        return null;
    return new LongWritable(a.get());
}

From source file:com.cloudera.impala.IncompatibleUdfTest.java

License:Apache License

public LongWritable evaluate(LongWritable a, List<IntWritable> b) {
    if (a == null)
        return null;
    return new LongWritable(a.get());
}

From source file:com.cloudera.sa.giraph.examples.componentisation.ComponentisationVertex.java

License:Apache License

@Override
public void compute(Iterable<LongWritable> messages) throws IOException {

    boolean updated = false;

    if (getSuperstep() == 0) {
        // In Superstep 0, we already know our neighbour's state is equal to their id (by definition),
        // so we cheat a little by using that knowledge, saving a superstep.
        long lowestId = getId().get();
        for (Edge<LongWritable, NullWritable> edge : getEdges()) {
            lowestId = Math.min(lowestId, edge.getTargetVertexId().get());
        }//ww w.  ja  va2  s .c  o  m
        if (lowestId < getId().get()) {
            getValue().set(lowestId);
            updated = true;
        }
    } else {
        // In all other supersteps we have to process messages to see if we should be updated
        long lowestValue = getValue().get();
        for (LongWritable message : messages) {
            lowestValue = Math.min(lowestValue, message.get());
        }
        if (lowestValue < getValue().get()) {
            getValue().set(lowestValue);
            updated = true;
        }
    }

    if (updated) {
        sendMessageToAllEdges(getValue());
    }
    voteToHalt();

}

From source file:com.cloudera.sa.giraph.examples.kmeans.KMeansVertex.java

License:Apache License

@Override
public void compute(Iterable<LongWritable> messages) throws IOException {
    // In the first superstep, we compute the ranges of the dimensions 
    if (getSuperstep() == 0) {
        aggregate(Constants.MAX, getValue().getPoint());
        aggregate(Constants.MIN, getValue().getPoint());
        return;/*from  w  ww  . j  a  va2  s.  c om*/
    } else {

        // If there were no cluster reassignments in the previous superstep, we're done.
        // (Other stopping criteria (not implemented here) could include a fixed number of
        // iterations, cluster centres that are not moving, or the Residual Sum of Squares
        // (RSS) is below a certain threshold.
        if (getSuperstep() > 1) {
            LongWritable updates = getAggregatedValue(Constants.UPDATES);
            if (updates.get() == 0) {
                voteToHalt();
                return;
            }
        }

        // If we're not stopping, we need to compute the closest cluster to this node
        int k = getConf().getInt(Constants.K, 3);
        PointWritable[] means = new PointWritable[k];
        int closest = -1;
        double closestDistance = Double.MAX_VALUE;
        for (int i = 0; i < k; i++) {
            means[i] = getAggregatedValue(Constants.POINT_PREFIX + i);
            double d = distance(getValue().getPoint().getData(), means[i].getData());
            if (d < closestDistance) {
                closestDistance = d;
                closest = i;
            }
        }

        // If the choice of cluster has changed, aggregate an update so the we recompute
        // on the next iteration.
        if (closest != getValue().getCluster()) {
            aggregate(Constants.UPDATES, one);
        }

        // Ensure that the closest cluster position is updated, irrespective of whether or
        // not the choice of cluster has changed.
        NodeState state = getValue();
        state.setCluster(closest);
        state.setClusterCentre(means[closest]);
        setValue(state);

        // Prepare the next iteration by aggregating this point into the closest cluster.
        aggregate(Constants.POINT_PREFIX + closest, getValue().getPoint());
    }

}

From source file:com.cloudera.sa.giraph.examples.ktrusses.MasterCompute.java

License:Apache License

@Override
public void compute() {
    PhaseWritable phase = getAggregatedValue(Constants.PHASE);
    LongWritable updates = getAggregatedValue(Constants.UPDATES);
    if (phase.get() == null) {
        phase.set(Phase.KCORE);/*from w  w w .  j  a v  a2 s.  c o m*/

    } else if (phase.get() == Phase.KCORE) {
        if (updates.get() == 0) {
            phase.set(Phase.DEGREE);
        } else {
            phase.set(Phase.KCORE);
        }
    } else if (phase.get() == Phase.DEGREE) {
        phase.set(Phase.QUERY_FOR_CLOSING_EDGES);
    } else if (phase.get() == Phase.QUERY_FOR_CLOSING_EDGES) {
        phase.set(Phase.FIND_TRIANGLES);
    } else if (phase.get() == Phase.FIND_TRIANGLES) {
        phase.set(Phase.TRUSSES);
    } else if (phase.get() == Phase.TRUSSES) {
        if (updates.get() == 0) {
            phase.set(Phase.COMPONENTISATION_1);
        } else {
            phase.set(Phase.DEGREE);
        }
    } else if (phase.get() == Phase.COMPONENTISATION_1) {
        phase.set(Phase.COMPONENTISATION_2);
    } else if (phase.get() == Phase.COMPONENTISATION_2) {
        if (updates.get() == 0) {
            phase.set(Phase.OUTPUT);
        } else {
            phase.set(Phase.COMPONENTISATION_2);
        }
    }
    setAggregatedValue(Constants.PHASE, phase);
}

From source file:com.cloudera.sa.giraph.examples.ktrusses.Message.java

License:Apache License

public Message(LongWritable source, IntWritable degree) {
    this.type = Type.DEGREE_UPDATE;
    this.source = new LongWritable(source.get());
    this.degree = new IntWritable(degree.get());
}

From source file:com.cloudera.sa.giraph.examples.ktrusses.Message.java

License:Apache License

public Message(LongWritable triadA, LongWritable triadB) {
    this.type = Type.OPEN_TRIAD;
    this.triadA = new LongWritable(triadA.get());
    this.triadB = new LongWritable(triadB.get());
}