List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs setCountersEnabled
public static void setCountersEnabled(Job job, boolean enabled)
From source file:pad.InitializationDriver.java
License:Apache License
/** * Execute the InitializationDriver Job. * //from w w w . jav a2s . c o m * If the input file format is adjacency list, then we can easily determinate the initial number of nodes * that is equal to the number of rows of the input file while the number of cliques is zero. * In order to obtain a list of arcs from the adjacency list, we use the \see InitializationMapperAdjacent * as Mapper and zero Reducer. * * If the input file format is cliques list, then we can easily determinate the number of cliques * that is equal to the number of rows of the input file. * In order to obtain a edges list from the cliques list, we use the \see InitializationMapperClique * as Mapper. We store this result into a special folder \see MOS_OUTPUT_NAME. * Into the regular folder, this Mapper emits all the encountered nodes. * We use \see InitializationReducerNumNodes as Reducer in order to count the initial number of nodes * counting all the distinct nodes found. The combiner (\see InitializationCombinerNumNodes) reduce locally * the number of duplicated nodes. * Obtained the value of the NUM_INITIAL_NODES counter ( \see UtilCounters ), we delete the empty files * produced by the Reducer and we move the real results into the main/regular folder. * * @param args array of external arguments, not used in this method * @return <c>1</c> if the InitializationDriver Job failed its execution; <c>0</c> if everything is ok. * @throws Exception */ public int run(String[] args) throws Exception { Configuration conf = new Configuration(); // GenericOptionsParser invocation in order to suppress the hadoop warning. new GenericOptionsParser(conf, args); Job job = new Job(conf, "InitializationDriver"); job.setJarByClass(InitializationDriver.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, this.input); FileOutputFormat.setOutputPath(job, this.output); if (this.type == InputType.ADJACENCY_LIST) { // In order to obtain the arcs list from the adjacency list, we need only a Mapper task. job.setMapperClass(InitializationMapperAdjacency.class); job.setNumReduceTasks(0); } else { // Set up the special folder. MultipleOutputs.addNamedOutput(job, MOS_OUTPUT_NAME, SequenceFileOutputFormat.class, IntWritable.class, IntWritable.class); MultipleOutputs.setCountersEnabled(job, true); // In order to obtain the edges list from the cliques list, we need only a Mapper task // and we save the result into the special folder. // Then, we need a Reducer task in order to count the initial number of nodes job.setMapperClass(InitializationMapperClique.class); job.setCombinerClass(InitializationCombinerNumNodes.class); job.setReducerClass(InitializationReducerNumNodes.class); } if (!job.waitForCompletion(verbose)) return 1; // Set up the private variables looking to the counters value this.numCliques = job.getCounters().findCounter(UtilCounters.NUM_CLIQUES).getValue(); this.numInitialNodes = job.getCounters().findCounter(UtilCounters.NUM_INITIAL_NODES).getValue(); if (this.type == InputType.CLIQUES_LIST) { FileSystem fs = FileSystem.get(conf); // Delete the empty outputs of the Job FileStatus[] filesStatus = fs.listStatus(this.output); for (FileStatus fileStatus : filesStatus) if (fileStatus.getPath().getName().contains("part")) fs.delete(fileStatus.getPath(), false); // Move the real outputs into the parent folder filesStatus = fs.listStatus(this.output.suffix("/" + MOS_OUTPUT_NAME)); for (FileStatus fileStatus : filesStatus) fs.rename(fileStatus.getPath(), this.output.suffix("/" + fileStatus.getPath().getName())); // Delete empty special folder fs.delete(this.output.suffix("/" + MOS_OUTPUT_NAME), true); } return 0; }
From source file:Patterns.A3_Partitioning.Partition_Users_By_Country_Driver.java
/** * @param args the command line arguments *//*from ww w. j a v a 2s . c o m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Users by Country"); job.setJarByClass(Partition_Users_By_Country_Driver.class); job.setMapperClass(Partition_Users_By_Country_Mapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // partitioner class inclusion job.setPartitionerClass(Partition_Users_By_Country_Partitioner.class); // set multiple formats for custom naming partitioning MultipleOutputs.addNamedOutput(job, "countryBins", TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.setCountersEnabled(job, true); // set num of reduce tasks based on partition we need (here we need 10 cos total no.of countries) job.setNumReduceTasks(11); job.setReducerClass(Partition_Users_By_Country_Reducer.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:seoeun.hadoop.multipleoutputs.TestMRMultipleOutputs.java
License:Apache License
protected void _testMultipleOutputs(boolean withCounters) throws Exception { String input = "a\nb\nc\nd\ne\nc\nd\ne"; //Configuration conf = createJobConf(); Configuration conf = new Configuration(); Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input); job.setJobName("mo"); MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class, Text.class); MultipleOutputs.setCountersEnabled(job, withCounters); job.setMapperClass(MOMap.class); job.setReducerClass(MOReduce.class); job.waitForCompletion(true);/*w w w . j a v a 2 s . c o m*/ // assert number of named output part files int namedOutputCount = 0; int valueBasedOutputCount = 0; FileSystem fs = OUT_DIR.getFileSystem(conf); FileStatus[] statuses = fs.listStatus(OUT_DIR); for (FileStatus status : statuses) { String fileName = status.getPath().getName(); if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001") || fileName.equals("text-r-00000") || fileName.equals("sequence_A-m-00000") || fileName.equals("sequence_A-m-00001") || fileName.equals("sequence_B-m-00000") || fileName.equals("sequence_B-m-00001") || fileName.equals("sequence_B-r-00000") || fileName.equals("sequence_C-r-00000")) { namedOutputCount++; } else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000") || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) { valueBasedOutputCount++; } } //assertEquals(9, namedOutputCount); //assertEquals(5, valueBasedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader( new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(job), "text-r-00000")))); int count = 0; String line = reader.readLine(); while (line != null) { assertTrue(line.endsWith(TEXT)); line = reader.readLine(); count++; } reader.close(); assertFalse(count == 0); // assert SequenceOutputFormat files correctness SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(job), "sequence_B-r-00000"), conf); assertEquals(IntWritable.class, seqReader.getKeyClass()); assertEquals(Text.class, seqReader.getValueClass()); count = 0; IntWritable key = new IntWritable(); Text value = new Text(); while (seqReader.next(key, value)) { assertEquals(SEQUENCE, value.toString()); count++; } seqReader.close(); assertFalse(count == 0); if (withCounters) { CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName()); assertEquals(9, counters.size()); assertEquals(4, counters.findCounter(TEXT).getValue()); assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue()); assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue()); assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue()); assertEquals(2, counters.findCounter("a").getValue()); assertEquals(2, counters.findCounter("b").getValue()); assertEquals(4, counters.findCounter("c").getValue()); assertEquals(4, counters.findCounter("d").getValue()); assertEquals(4, counters.findCounter("e").getValue()); } }