List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java
License:Open Source License
/** * A MapReduce version of KNN query./* ww w . ja v a 2 s .com*/ * * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "KNN"); job.setJarByClass(KNNDTW.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with // it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple // iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance // among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered // at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }
From source file:fr.worf.hadoop.stats.reducers.AverageReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int result = 0; int count = 0; for (IntWritable val : values) { result += val.get(); ++count;//from w ww.j av a2s .c o m } result /= count; logger.debug("Average : " + result); logger.debug("Nombre : " + count); Put put = new Put(Bytes.toBytes(key.toString())); put.add(CF, COL, Bytes.toBytes(result)); context.write(null, put); }
From source file:fr.worf.hadoop.stats.reducers.GroupReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int result = 0; int count = 0; int hot = 0;//from w w w . ja va 2s . co m int avg = 0; int cold = 0; for (IntWritable val : values) { int index = val.get(); if (index < 15) { ++cold; } else if (index < 30) { ++avg; } else { ++hot; } ++count; } result /= count; logger.debug("Average : " + result); logger.debug("Nombre : " + count); Put put = new Put(Bytes.toBytes(key.toString())); put.add(CF, "Hot".getBytes(), Bytes.toBytes(hot)); put.add(CF, "Average".getBytes(), Bytes.toBytes(avg)); put.add(CF, "Cold".getBytes(), Bytes.toBytes(cold)); context.write(null, put); }
From source file:full_MapReduce.AttributeInfoReducer.java
License:Open Source License
public void reduce(Text key, Iterable<AttributeCounterWritable> values, Context context) throws IOException, InterruptedException { MapWritable res = new MapWritable(); Text value;//from ww w . j av a 2 s.c o m Text classification; IntWritable count; for (AttributeCounterWritable cur_attribute_counter : values) { value = cur_attribute_counter.getValue(); classification = cur_attribute_counter.getClassification(); count = cur_attribute_counter.getCount(); if (!res.containsKey(value)) { res.put(new Text(value), new MapWritable()); } MapWritable cur_map = (MapWritable) res.get(value); if (!cur_map.containsKey(classification)) { cur_map.put(new Text(classification), new IntWritable(0)); } ((IntWritable) cur_map.get(classification)) .set(((IntWritable) cur_map.get(classification)).get() + count.get()); } context.write(key, res); }
From source file:full_MapReduce.SummarizeReducer.java
License:Open Source License
public void reduce(TextArrayWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int count = 0; for (IntWritable value : values) { count += value.get(); }//from w w w.j av a 2s .c o m context.write(key, new IntWritable(count)); }
From source file:functionaltests.ext.mapreduce.TestMapReduce3Jobs.java
License:Apache License
@org.junit.Test public void run() throws Exception { Path TEST_ROOT_DIR = new Path(System.getProperty("java.io.tmpdir") + File.separator + "TestMapReduce3Jobs"); fs.delete(TEST_ROOT_DIR, true);/*from w w w. java2 s . com*/ // // Generate distribution of ints. This is the answer key. // Configuration conf = new Configuration(); int countsToGo = counts; int dist[] = new int[range]; for (int i = 0; i < range; i++) { double avgInts = (1.0 * countsToGo) / (range - i); dist[i] = (int) Math.max(0, Math.round(avgInts + (Math.sqrt(avgInts) * r.nextGaussian()))); countsToGo -= dist[i]; } if (countsToGo > 0) { dist[dist.length - 1] += countsToGo; } // // Write the answer key to a file. // if (!fs.mkdirs(TEST_ROOT_DIR)) { throw new IOException("Mkdirs failed to create " + TEST_ROOT_DIR.toString()); } Path randomInsRel = new Path("genins"); Path randomIns = new Path(TEST_ROOT_DIR, randomInsRel); if (!fs.mkdirs(randomIns)) { throw new IOException("Mkdirs failed to create " + randomIns.toString()); } Path answerkeyRel = new Path("answer.key"); Path answerkey = new Path(randomIns, answerkeyRel); SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, answerkey, IntWritable.class, IntWritable.class, SequenceFile.CompressionType.NONE); try { for (int i = 0; i < range; i++) { out.append(new IntWritable(i), new IntWritable(dist[i])); } } finally { out.close(); } printFiles(randomIns, conf); // // Now we need to generate the random numbers according to // the above distribution. // // We create a lot of map tasks, each of which takes at least // one "line" of the distribution. (That is, a certain number // X is to be generated Y number of times.) // // A map task emits Y key/val pairs. The val is X. The key // is a randomly-generated number. // // The reduce task gets its input sorted by key. That is, sorted // in random order. It then emits a single line of text that // for the given values. It does not emit the key. // // Because there's just one reduce task, we emit a single big // file of random numbers. // Path randomOutsRel = new Path("genouts"); Path randomOuts = new Path(TEST_ROOT_DIR, randomOutsRel); fs.delete(randomOuts, true); fs.mkdirs(randomOuts); Job genJob = new Job(conf, "gen job"); // FileInputFormat.setInputPaths(genJob, randomIns); genJob.setInputFormatClass(SequenceFileInputFormat.class); genJob.setMapperClass(RandomGenMapper.class); // genJob.setMapperClass(TokenizerMapper.class); FileInputFormat.addInputPath(genJob, answerkeyRel); FileOutputFormat.setOutputPath(genJob, randomOutsRel); // FileOutputFormat.setOutputPath(genJob, randomOuts); genJob.setOutputKeyClass(IntWritable.class); genJob.setOutputValueClass(IntWritable.class); // genJob.setOutputFormatClass(SequenceFileOutputFormat.class); genJob.setReducerClass(RandomGenReducer.class); genJob.setNumReduceTasks(1); PAMapReduceJobConfiguration pamrjc = MapReduceTHelper.getConfiguration(); pamrjc.setInputSpace("file://" + randomIns); pamrjc.setOutputSpace("file://" + TEST_ROOT_DIR); MapReduceTHelper.submit(genJob, pamrjc); printFiles(randomOuts, conf); // // Next, we read the big file in and regenerate the // original map. It's split into a number of parts. // (That number is 'intermediateReduces'.) // // We have many map tasks, each of which read at least one // of the output numbers. For each number read in, the // map task emits a key/value pair where the key is the // number and the value is "1". // // We have a single reduce task, which receives its input // sorted by the key emitted above. For each key, there will // be a certain number of "1" values. The reduce task sums // these values to compute how many times the given key was // emitted. // // The reduce task then emits a key/val pair where the key // is the number in question, and the value is the number of // times the key was emitted. This is the same format as the // original answer key (except that numbers emitted zero times // will not appear in the regenerated key.) The answer set // is split into a number of pieces. A final MapReduce job // will merge them. // // There's not really a need to go to 10 reduces here // instead of 1. But we want to test what happens when // you have multiple reduces at once. // int intermediateReduces = 10; Path intermediateOutsRel = new Path("intermediateouts"); Path intermediateOuts = new Path(TEST_ROOT_DIR, intermediateOutsRel); fs.delete(intermediateOuts, true); conf = new Configuration(); Job checkJob = new Job(conf, "check job"); // FileInputFormat.setInputPaths(checkJob, randomOuts); FileInputFormat.setInputPaths(checkJob, randomOutsRel); checkJob.setMapperClass(RandomCheckMapper.class); // checkJob.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(checkJob, intermediateOutsRel); checkJob.setOutputKeyClass(IntWritable.class); checkJob.setOutputValueClass(IntWritable.class); checkJob.setOutputFormatClass(SequenceFileOutputFormat.class); checkJob.setReducerClass(RandomCheckReducer.class); checkJob.setNumReduceTasks(intermediateReduces); pamrjc = MapReduceTHelper.getConfiguration(); pamrjc.setInputSpace("file://" + TEST_ROOT_DIR); pamrjc.setOutputSpace("file://" + TEST_ROOT_DIR); MapReduceTHelper.submit(checkJob, pamrjc); printFiles(intermediateOuts, conf); // // OK, now we take the output from the last job and // merge it down to a single file. The map() and reduce() // functions don't really do anything except reemit tuples. // But by having a single reduce task here, we end up merging // all the files. // Path finalOutsRel = new Path("finalouts"); Path finalOuts = new Path(TEST_ROOT_DIR, finalOutsRel); fs.delete(finalOuts, true); Job mergeJob = new Job(conf, "merge job"); FileInputFormat.setInputPaths(mergeJob, intermediateOutsRel); mergeJob.setInputFormatClass(SequenceFileInputFormat.class); mergeJob.setMapperClass(MergeMapper.class); FileOutputFormat.setOutputPath(mergeJob, finalOutsRel); mergeJob.setOutputKeyClass(IntWritable.class); mergeJob.setOutputValueClass(IntWritable.class); mergeJob.setOutputFormatClass(SequenceFileOutputFormat.class); mergeJob.setReducerClass(MergeReducer.class); mergeJob.setNumReduceTasks(1); pamrjc = MapReduceTHelper.getConfiguration(); pamrjc.setInputSpace("file://" + TEST_ROOT_DIR); pamrjc.setOutputSpace("file://" + TEST_ROOT_DIR); MapReduceTHelper.submit(mergeJob, pamrjc); printFiles(finalOuts, conf); // // Finally, we compare the reconstructed answer key with the // original one. Remember, we need to ignore zero-count items // in the original key. // boolean success = true; try { File dir = new File(finalOuts.toString()); System.out.println(finalOuts.toString()); System.out.println(dir); String filename = dir.list()[0]; Path recomputedkey = new Path(finalOuts, filename); System.out.println("++++++++++++++++ Path to recomputed key: " + recomputedkey); SequenceFile.Reader in = new SequenceFile.Reader(fs, recomputedkey, conf); int totalseen = 0; try { IntWritable key = new IntWritable(); IntWritable val = new IntWritable(); for (int i = 0; i < range; i++) { if (dist[i] == 0) { continue; } if (!in.next(key, val)) { System.err.println("Cannot read entry " + i); success = false; break; } else { if (!((key.get() == i) && (val.get() == dist[i]))) { System.err.println("Mismatch! Pos=" + key.get() + ", i=" + i + ", val=" + val.get() + ", dist[i]=" + dist[i]); success = false; } totalseen += val.get(); } } if (success) { if (in.next(key, val)) { System.err.println("Unnecessary lines in recomputed key!"); success = false; } } } finally { in.close(); } int originalTotal = 0; for (int i = 0; i < dist.length; i++) { originalTotal += dist[i]; } System.out.println("Original sum: " + originalTotal); System.out.println("Recomputed sum: " + totalseen); // // Write to "results" whether the test succeeded or not. // Path resultFile = new Path(TEST_ROOT_DIR, "results"); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(resultFile))); try { bw.write("Success=" + success + "\n"); System.out.println("Success=" + success); } finally { bw.close(); } Assert.assertTrue("Test failed", success); fs.delete(TEST_ROOT_DIR, true); } catch (Throwable e) { Assert.assertTrue("Unexpected exception; test failed", false); e.printStackTrace(); } }
From source file:gov.llnl.ontology.mapreduce.stats.WordCountSumReducer.java
License:Open Source License
/** * {@inheritDoc}/* w ww . ja v a 2 s. co m*/ */ public void reduce(StringPair key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int totalCount = 0; for (IntWritable value : values) totalCount += value.get(); context.write(key, new IntWritable(totalCount)); }
From source file:graph.Standard.java
License:Apache License
@Override public void compute(Iterable<IntWritable> messages) throws IOException { int turn = this.getConf().getInt("nr.turns", 1); if (getSuperstep() >= 1) { int sum = 0; for (IntWritable i : messages) { sum += i.get(); }/*from www.j av a 2 s. c o m*/ setValue(new IntWritable(sum)); } if (getSuperstep() < turn) { long edges = getNumEdges(); if (edges > 0) { sendMessageToAllEdges(getId()); } } else { voteToHalt(); } }
From source file:hadoop.mongo.treasury.TreasuryYieldReducer.java
License:Apache License
@Override public void reduce(final IntWritable pKey, final Iterable<DoubleWritable> pValues, final Context pContext) throws IOException, InterruptedException { int count = 0; double sum = 0; for (final DoubleWritable value : pValues) { sum += value.get();/*w ww .j a v a2 s .co m*/ count++; } final double avg = sum / count; if (LOG.isDebugEnabled()) { LOG.debug("Average 10 Year Treasury for " + pKey.get() + " was " + avg); } System.out.println("Average 10 Year Treasury for " + pKey.get() + " was " + avg); BasicBSONObject output = new BasicBSONObject(); output.put("count", count); output.put("avg", avg); output.put("sum", sum); reduceResult.setDoc(output); pContext.write(pKey, reduceResult); }
From source file:hadoop.mongo.treasury.TreasuryYieldReducer.java
License:Apache License
@Override public void reduce(final IntWritable key, final Iterator<DoubleWritable> values, final OutputCollector<IntWritable, BSONWritable> output, final Reporter reporter) throws IOException { int count = 0; double sum = 0; while (values.hasNext()) { sum += values.next().get();//from ww w . j a va2 s.c o m count++; } final double avg = sum / count; if (LOG.isDebugEnabled()) { LOG.debug("Average 10 Year Treasury for " + key.get() + " was " + avg); } System.out.println("Average 10 Year Treasury for " + key.get() + " was " + avg); BasicBSONObject bsonObject = new BasicBSONObject(); bsonObject.put("count", count); bsonObject.put("avg", avg); bsonObject.put("sum", sum); reduceResult.setDoc(bsonObject); output.collect(key, reduceResult); }