Example usage for org.apache.hadoop.io IntWritable get

List of usage examples for org.apache.hadoop.io IntWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable get.

Prototype

public int get() 

Source Link

Document

Return the value of this IntWritable.

Usage

From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java

License:Open Source License

/**
 * A MapReduce version of KNN query./* ww w  .  ja v a  2  s  .com*/
 * 
 * @param fs
 * @param inputPath
 * @param queryPoint
 * @param shape
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(params, "KNN");
    job.setJarByClass(KNNDTW.class);

    FileSystem inFs = inputPath.getFileSystem(params);
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inputPath);

    job.setMapperClass(KNNMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TextWithDistance.class);

    job.setReducerClass(KNNReduce.class);
    job.setNumReduceTasks(1);

    job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    final Point queryPoint = (Point) params.getShape("point");
    final int k = params.getInt("k", 1);

    final IntWritable additional_blocks_2b_processed = new IntWritable(0);
    long resultCount;
    int iterations = 0;

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000));
        } while (inFs.exists(outputPath));
    }
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);

    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath);
    Configuration templateConf = job.getConfiguration();

    FileSystem outFs = outputPath.getFileSystem(params);
    // Start with the query point to select all partitions overlapping with
    // it
    Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y);

    do {
        job = new Job(templateConf);
        // Delete results of last iteration if not first iteration
        if (outputPath != null)
            outFs.delete(outputPath, true);

        LOG.info("Running iteration: " + (++iterations));
        // Set query range for the SpatialInputFormat
        OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration);

        // Submit the job
        if (params.getBoolean("background", false)) {
            // XXX this is incorrect because if the job needs multiple
            // iterations,
            // it will run only the first one
            job.waitForCompletion(false);
            return job;
        }
        job.waitForCompletion(false);

        // Retrieve answers for this iteration
        Counters counters = job.getCounters();
        Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        resultCount = resultSizeCounter.getValue();

        if (globalIndex != null) {
            Circle range_for_next_iteration;
            if (resultCount < k) {
                LOG.info("Found only " + resultCount + " results");
                // Did not find enough results in the query space
                // Increase the distance by doubling the maximum distance
                // among all
                // partitions that were processed
                final DoubleWritable maximum_distance = new DoubleWritable(0);
                int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration,
                        new ResultCollector<Partition>() {
                            @Override
                            public void collect(Partition p) {
                                double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y);
                                if (distance > maximum_distance.get())
                                    maximum_distance.set(distance);
                            }
                        });
                if (matched_partitions == 0) {
                    // The query point is outside the search space
                    // Set the range to include the closest partition
                    globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() {
                        @Override
                        public void collect(Partition r, Double s) {
                            maximum_distance.set(s);
                        }
                    });
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2);
                LOG.info("Expanding to " + maximum_distance.get() * 2);
            } else {
                // Calculate the new test range which is a circle centered
                // at the
                // query point and distance to the k^{th} neighbor

                // Get distance to the kth neighbor
                final DoubleWritable distance_to_kth_neighbor = new DoubleWritable();
                FileStatus[] results = outFs.listStatus(outputPath);
                for (FileStatus result_file : results) {
                    if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) {
                        // Read the last line (kth neighbor)
                        Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(),
                                new ResultCollector<TextWithDistance>() {

                                    @Override
                                    public void collect(TextWithDistance r) {
                                        distance_to_kth_neighbor.set(r.distance);
                                    }
                                });
                    }
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y,
                        distance_to_kth_neighbor.get());
                LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor);
            }

            // Calculate the number of blocks to be processed to check the
            // terminating condition;
            additional_blocks_2b_processed.set(0);
            final Shape temp = range_for_this_iteration;
            globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() {
                @Override
                public void collect(Partition p) {
                    if (!(p.isIntersected(temp))) {
                        additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1);
                    }
                }
            });
            range_for_this_iteration = range_for_next_iteration;
        }
    } while (additional_blocks_2b_processed.get() > 0);

    // If output file is not set by user, delete it
    if (userOutputPath == null)
        outFs.delete(outputPath, true);
    TotalIterations.addAndGet(iterations);

    return job;
}

From source file:fr.worf.hadoop.stats.reducers.AverageReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int result = 0;
    int count = 0;
    for (IntWritable val : values) {
        result += val.get();
        ++count;//from w  ww.j  av a2s .c  o m
    }
    result /= count;

    logger.debug("Average : " + result);
    logger.debug("Nombre : " + count);

    Put put = new Put(Bytes.toBytes(key.toString()));
    put.add(CF, COL, Bytes.toBytes(result));

    context.write(null, put);
}

From source file:fr.worf.hadoop.stats.reducers.GroupReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int result = 0;
    int count = 0;
    int hot = 0;//from  w w  w .  ja  va  2s .  co m
    int avg = 0;
    int cold = 0;

    for (IntWritable val : values) {
        int index = val.get();
        if (index < 15) {
            ++cold;
        } else if (index < 30) {
            ++avg;
        } else {
            ++hot;
        }
        ++count;
    }
    result /= count;

    logger.debug("Average : " + result);
    logger.debug("Nombre : " + count);

    Put put = new Put(Bytes.toBytes(key.toString()));
    put.add(CF, "Hot".getBytes(), Bytes.toBytes(hot));
    put.add(CF, "Average".getBytes(), Bytes.toBytes(avg));
    put.add(CF, "Cold".getBytes(), Bytes.toBytes(cold));

    context.write(null, put);
}

From source file:full_MapReduce.AttributeInfoReducer.java

License:Open Source License

public void reduce(Text key, Iterable<AttributeCounterWritable> values, Context context)
        throws IOException, InterruptedException {
    MapWritable res = new MapWritable();
    Text value;//from   ww  w .  j  av  a 2  s.c o  m
    Text classification;
    IntWritable count;

    for (AttributeCounterWritable cur_attribute_counter : values) {
        value = cur_attribute_counter.getValue();
        classification = cur_attribute_counter.getClassification();
        count = cur_attribute_counter.getCount();

        if (!res.containsKey(value)) {
            res.put(new Text(value), new MapWritable());
        }
        MapWritable cur_map = (MapWritable) res.get(value);

        if (!cur_map.containsKey(classification)) {
            cur_map.put(new Text(classification), new IntWritable(0));
        }
        ((IntWritable) cur_map.get(classification))
                .set(((IntWritable) cur_map.get(classification)).get() + count.get());
    }

    context.write(key, res);
}

From source file:full_MapReduce.SummarizeReducer.java

License:Open Source License

public void reduce(TextArrayWritable key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {

    int count = 0;
    for (IntWritable value : values) {
        count += value.get();
    }//from  w w  w.j  av  a 2s  .c o  m

    context.write(key, new IntWritable(count));
}

From source file:functionaltests.ext.mapreduce.TestMapReduce3Jobs.java

License:Apache License

@org.junit.Test
public void run() throws Exception {

    Path TEST_ROOT_DIR = new Path(System.getProperty("java.io.tmpdir") + File.separator + "TestMapReduce3Jobs");

    fs.delete(TEST_ROOT_DIR, true);/*from w w  w. java2  s  .  com*/

    //
    // Generate distribution of ints. This is the answer key.
    //
    Configuration conf = new Configuration();
    int countsToGo = counts;
    int dist[] = new int[range];
    for (int i = 0; i < range; i++) {
        double avgInts = (1.0 * countsToGo) / (range - i);
        dist[i] = (int) Math.max(0, Math.round(avgInts + (Math.sqrt(avgInts) * r.nextGaussian())));
        countsToGo -= dist[i];
    }
    if (countsToGo > 0) {
        dist[dist.length - 1] += countsToGo;
    }

    //
    // Write the answer key to a file.
    //
    if (!fs.mkdirs(TEST_ROOT_DIR)) {
        throw new IOException("Mkdirs failed to create " + TEST_ROOT_DIR.toString());
    }

    Path randomInsRel = new Path("genins");
    Path randomIns = new Path(TEST_ROOT_DIR, randomInsRel);
    if (!fs.mkdirs(randomIns)) {
        throw new IOException("Mkdirs failed to create " + randomIns.toString());
    }

    Path answerkeyRel = new Path("answer.key");
    Path answerkey = new Path(randomIns, answerkeyRel);
    SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, answerkey, IntWritable.class,
            IntWritable.class, SequenceFile.CompressionType.NONE);
    try {
        for (int i = 0; i < range; i++) {
            out.append(new IntWritable(i), new IntWritable(dist[i]));
        }
    } finally {
        out.close();
    }

    printFiles(randomIns, conf);

    //
    // Now we need to generate the random numbers according to
    // the above distribution.
    //
    // We create a lot of map tasks, each of which takes at least
    // one "line" of the distribution. (That is, a certain number
    // X is to be generated Y number of times.)
    //
    // A map task emits Y key/val pairs. The val is X. The key
    // is a randomly-generated number.
    //
    // The reduce task gets its input sorted by key. That is, sorted
    // in random order. It then emits a single line of text that
    // for the given values. It does not emit the key.
    //
    // Because there's just one reduce task, we emit a single big
    // file of random numbers.
    //
    Path randomOutsRel = new Path("genouts");
    Path randomOuts = new Path(TEST_ROOT_DIR, randomOutsRel);
    fs.delete(randomOuts, true);
    fs.mkdirs(randomOuts);

    Job genJob = new Job(conf, "gen job");
    // FileInputFormat.setInputPaths(genJob, randomIns);
    genJob.setInputFormatClass(SequenceFileInputFormat.class);
    genJob.setMapperClass(RandomGenMapper.class);
    // genJob.setMapperClass(TokenizerMapper.class);

    FileInputFormat.addInputPath(genJob, answerkeyRel);
    FileOutputFormat.setOutputPath(genJob, randomOutsRel);

    // FileOutputFormat.setOutputPath(genJob, randomOuts);
    genJob.setOutputKeyClass(IntWritable.class);
    genJob.setOutputValueClass(IntWritable.class);
    // genJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    genJob.setReducerClass(RandomGenReducer.class);
    genJob.setNumReduceTasks(1);

    PAMapReduceJobConfiguration pamrjc = MapReduceTHelper.getConfiguration();
    pamrjc.setInputSpace("file://" + randomIns);
    pamrjc.setOutputSpace("file://" + TEST_ROOT_DIR);

    MapReduceTHelper.submit(genJob, pamrjc);

    printFiles(randomOuts, conf);

    //
    // Next, we read the big file in and regenerate the
    // original map. It's split into a number of parts.
    // (That number is 'intermediateReduces'.)
    //
    // We have many map tasks, each of which read at least one
    // of the output numbers. For each number read in, the
    // map task emits a key/value pair where the key is the
    // number and the value is "1".
    //
    // We have a single reduce task, which receives its input
    // sorted by the key emitted above. For each key, there will
    // be a certain number of "1" values. The reduce task sums
    // these values to compute how many times the given key was
    // emitted.
    //
    // The reduce task then emits a key/val pair where the key
    // is the number in question, and the value is the number of
    // times the key was emitted. This is the same format as the
    // original answer key (except that numbers emitted zero times
    // will not appear in the regenerated key.) The answer set
    // is split into a number of pieces. A final MapReduce job
    // will merge them.
    //
    // There's not really a need to go to 10 reduces here
    // instead of 1. But we want to test what happens when
    // you have multiple reduces at once.
    //
    int intermediateReduces = 10;
    Path intermediateOutsRel = new Path("intermediateouts");
    Path intermediateOuts = new Path(TEST_ROOT_DIR, intermediateOutsRel);
    fs.delete(intermediateOuts, true);
    conf = new Configuration();
    Job checkJob = new Job(conf, "check job");
    // FileInputFormat.setInputPaths(checkJob, randomOuts);
    FileInputFormat.setInputPaths(checkJob, randomOutsRel);
    checkJob.setMapperClass(RandomCheckMapper.class);
    // checkJob.setInputFormatClass(TextInputFormat.class);

    FileOutputFormat.setOutputPath(checkJob, intermediateOutsRel);
    checkJob.setOutputKeyClass(IntWritable.class);
    checkJob.setOutputValueClass(IntWritable.class);
    checkJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    checkJob.setReducerClass(RandomCheckReducer.class);
    checkJob.setNumReduceTasks(intermediateReduces);

    pamrjc = MapReduceTHelper.getConfiguration();
    pamrjc.setInputSpace("file://" + TEST_ROOT_DIR);
    pamrjc.setOutputSpace("file://" + TEST_ROOT_DIR);

    MapReduceTHelper.submit(checkJob, pamrjc);

    printFiles(intermediateOuts, conf);

    //
    // OK, now we take the output from the last job and
    // merge it down to a single file. The map() and reduce()
    // functions don't really do anything except reemit tuples.
    // But by having a single reduce task here, we end up merging
    // all the files.
    //
    Path finalOutsRel = new Path("finalouts");
    Path finalOuts = new Path(TEST_ROOT_DIR, finalOutsRel);
    fs.delete(finalOuts, true);
    Job mergeJob = new Job(conf, "merge job");
    FileInputFormat.setInputPaths(mergeJob, intermediateOutsRel);
    mergeJob.setInputFormatClass(SequenceFileInputFormat.class);
    mergeJob.setMapperClass(MergeMapper.class);

    FileOutputFormat.setOutputPath(mergeJob, finalOutsRel);
    mergeJob.setOutputKeyClass(IntWritable.class);
    mergeJob.setOutputValueClass(IntWritable.class);
    mergeJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    mergeJob.setReducerClass(MergeReducer.class);
    mergeJob.setNumReduceTasks(1);

    pamrjc = MapReduceTHelper.getConfiguration();
    pamrjc.setInputSpace("file://" + TEST_ROOT_DIR);
    pamrjc.setOutputSpace("file://" + TEST_ROOT_DIR);

    MapReduceTHelper.submit(mergeJob, pamrjc);

    printFiles(finalOuts, conf);

    //
    // Finally, we compare the reconstructed answer key with the
    // original one. Remember, we need to ignore zero-count items
    // in the original key.
    //
    boolean success = true;
    try {
        File dir = new File(finalOuts.toString());
        System.out.println(finalOuts.toString());
        System.out.println(dir);
        String filename = dir.list()[0];
        Path recomputedkey = new Path(finalOuts, filename);
        System.out.println("++++++++++++++++ Path to recomputed key: " + recomputedkey);
        SequenceFile.Reader in = new SequenceFile.Reader(fs, recomputedkey, conf);
        int totalseen = 0;
        try {
            IntWritable key = new IntWritable();
            IntWritable val = new IntWritable();
            for (int i = 0; i < range; i++) {
                if (dist[i] == 0) {
                    continue;
                }
                if (!in.next(key, val)) {
                    System.err.println("Cannot read entry " + i);
                    success = false;
                    break;
                } else {
                    if (!((key.get() == i) && (val.get() == dist[i]))) {
                        System.err.println("Mismatch!  Pos=" + key.get() + ", i=" + i + ", val=" + val.get()
                                + ", dist[i]=" + dist[i]);
                        success = false;
                    }
                    totalseen += val.get();
                }
            }
            if (success) {
                if (in.next(key, val)) {
                    System.err.println("Unnecessary lines in recomputed key!");
                    success = false;
                }
            }
        } finally {
            in.close();
        }
        int originalTotal = 0;
        for (int i = 0; i < dist.length; i++) {
            originalTotal += dist[i];
        }
        System.out.println("Original sum: " + originalTotal);
        System.out.println("Recomputed sum: " + totalseen);

        //
        // Write to "results" whether the test succeeded or not.
        //
        Path resultFile = new Path(TEST_ROOT_DIR, "results");
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(resultFile)));
        try {
            bw.write("Success=" + success + "\n");
            System.out.println("Success=" + success);
        } finally {
            bw.close();
        }
        Assert.assertTrue("Test failed", success);
        fs.delete(TEST_ROOT_DIR, true);
    } catch (Throwable e) {
        Assert.assertTrue("Unexpected exception; test failed", false);
        e.printStackTrace();
    }
}

From source file:gov.llnl.ontology.mapreduce.stats.WordCountSumReducer.java

License:Open Source License

/**
 * {@inheritDoc}/* w  ww . ja v  a 2 s.  co m*/
 */
public void reduce(StringPair key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int totalCount = 0;
    for (IntWritable value : values)
        totalCount += value.get();
    context.write(key, new IntWritable(totalCount));
}

From source file:graph.Standard.java

License:Apache License

@Override
public void compute(Iterable<IntWritable> messages) throws IOException {
    int turn = this.getConf().getInt("nr.turns", 1);

    if (getSuperstep() >= 1) {
        int sum = 0;

        for (IntWritable i : messages) {
            sum += i.get();
        }/*from www.j  av  a  2  s.  c o m*/

        setValue(new IntWritable(sum));
    }

    if (getSuperstep() < turn) {
        long edges = getNumEdges();
        if (edges > 0) {
            sendMessageToAllEdges(getId());
        }
    } else {
        voteToHalt();
    }
}

From source file:hadoop.mongo.treasury.TreasuryYieldReducer.java

License:Apache License

@Override
public void reduce(final IntWritable pKey, final Iterable<DoubleWritable> pValues, final Context pContext)
        throws IOException, InterruptedException {

    int count = 0;
    double sum = 0;
    for (final DoubleWritable value : pValues) {
        sum += value.get();/*w ww  .j  a  v a2  s  .co  m*/
        count++;
    }

    final double avg = sum / count;

    if (LOG.isDebugEnabled()) {
        LOG.debug("Average 10 Year Treasury for " + pKey.get() + " was " + avg);
    }
    System.out.println("Average 10 Year Treasury for " + pKey.get() + " was " + avg);
    BasicBSONObject output = new BasicBSONObject();
    output.put("count", count);
    output.put("avg", avg);
    output.put("sum", sum);
    reduceResult.setDoc(output);
    pContext.write(pKey, reduceResult);
}

From source file:hadoop.mongo.treasury.TreasuryYieldReducer.java

License:Apache License

@Override
public void reduce(final IntWritable key, final Iterator<DoubleWritable> values,
        final OutputCollector<IntWritable, BSONWritable> output, final Reporter reporter) throws IOException {
    int count = 0;
    double sum = 0;
    while (values.hasNext()) {
        sum += values.next().get();//from ww  w .  j a va2  s.c  o  m
        count++;
    }

    final double avg = sum / count;

    if (LOG.isDebugEnabled()) {
        LOG.debug("Average 10 Year Treasury for " + key.get() + " was " + avg);
    }
    System.out.println("Average 10 Year Treasury for " + key.get() + " was " + avg);

    BasicBSONObject bsonObject = new BasicBSONObject();
    bsonObject.put("count", count);
    bsonObject.put("avg", avg);
    bsonObject.put("sum", sum);
    reduceResult.setDoc(bsonObject);
    output.collect(key, reduceResult);
}