Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs setCountersEnabled

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs setCountersEnabled.

Prototype

public static void setCountersEnabled(Job job, boolean enabled)

Source Link

Document

Enables or disables counters for the named outputs.

Usage

From source file:pad.InitializationDriver.java

License:Apache License

/**
 * Execute the InitializationDriver Job.
 * //from   w  w w .  jav a2s  .  c  o  m
 * If the input file format is adjacency list, then we can easily determinate the initial number of nodes
 * that is equal to the number of rows of the input file while the number of cliques is zero.
 * In order to obtain a list of arcs from the adjacency list, we use the \see InitializationMapperAdjacent
 * as Mapper and zero Reducer.
 * 
 * If the input file format is cliques list, then we can easily determinate the number of cliques
 * that is equal to the number of rows of the input file.
 * In order to obtain a edges list from the cliques list, we use the \see InitializationMapperClique
 * as Mapper. We store this result into a special folder \see MOS_OUTPUT_NAME.
 * Into the regular folder, this Mapper emits all the encountered nodes.
 * We use \see InitializationReducerNumNodes as Reducer in order to count the initial number of nodes
 * counting all the distinct nodes found. The combiner (\see InitializationCombinerNumNodes) reduce locally
 * the number of duplicated nodes.
 * Obtained the value of the NUM_INITIAL_NODES counter ( \see UtilCounters ), we delete the empty files
 * produced by the Reducer and we move the real results into the main/regular folder.
 * 
 * @param args      array of external arguments, not used in this method
 * @return          <c>1</c> if the InitializationDriver Job failed its execution; <c>0</c> if everything is ok. 
 * @throws Exception 
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // GenericOptionsParser invocation in order to suppress the hadoop warning.
    new GenericOptionsParser(conf, args);
    Job job = new Job(conf, "InitializationDriver");
    job.setJarByClass(InitializationDriver.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, this.input);
    FileOutputFormat.setOutputPath(job, this.output);

    if (this.type == InputType.ADJACENCY_LIST) {
        // In order to obtain the arcs list from the adjacency list, we need only a Mapper task.
        job.setMapperClass(InitializationMapperAdjacency.class);
        job.setNumReduceTasks(0);
    } else {
        // Set up the special folder.
        MultipleOutputs.addNamedOutput(job, MOS_OUTPUT_NAME, SequenceFileOutputFormat.class, IntWritable.class,
                IntWritable.class);
        MultipleOutputs.setCountersEnabled(job, true);
        // In order to obtain the edges list from the cliques list, we need only a Mapper task
        // and we save the result into the special folder.
        // Then, we need a Reducer task in order to count the initial number of nodes
        job.setMapperClass(InitializationMapperClique.class);
        job.setCombinerClass(InitializationCombinerNumNodes.class);
        job.setReducerClass(InitializationReducerNumNodes.class);
    }

    if (!job.waitForCompletion(verbose))
        return 1;

    // Set up the private variables looking to the counters value
    this.numCliques = job.getCounters().findCounter(UtilCounters.NUM_CLIQUES).getValue();
    this.numInitialNodes = job.getCounters().findCounter(UtilCounters.NUM_INITIAL_NODES).getValue();

    if (this.type == InputType.CLIQUES_LIST) {
        FileSystem fs = FileSystem.get(conf);

        // Delete the empty outputs of the Job
        FileStatus[] filesStatus = fs.listStatus(this.output);
        for (FileStatus fileStatus : filesStatus)
            if (fileStatus.getPath().getName().contains("part"))
                fs.delete(fileStatus.getPath(), false);

        // Move the real outputs into the parent folder
        filesStatus = fs.listStatus(this.output.suffix("/" + MOS_OUTPUT_NAME));
        for (FileStatus fileStatus : filesStatus)
            fs.rename(fileStatus.getPath(), this.output.suffix("/" + fileStatus.getPath().getName()));

        // Delete empty special folder
        fs.delete(this.output.suffix("/" + MOS_OUTPUT_NAME), true);
    }

    return 0;
}

From source file:Patterns.A3_Partitioning.Partition_Users_By_Country_Driver.java

/**
 * @param args the command line arguments
 *//*from  ww  w.  j a  v  a 2s  .  c  o m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Users by Country");
    job.setJarByClass(Partition_Users_By_Country_Driver.class);

    job.setMapperClass(Partition_Users_By_Country_Mapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // partitioner class inclusion
    job.setPartitionerClass(Partition_Users_By_Country_Partitioner.class);

    // set multiple formats for custom naming partitioning
    MultipleOutputs.addNamedOutput(job, "countryBins", TextOutputFormat.class, Text.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);

    // set num of reduce tasks based on partition we need (here we need 10 cos total no.of countries)
    job.setNumReduceTasks(11);
    job.setReducerClass(Partition_Users_By_Country_Reducer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:seoeun.hadoop.multipleoutputs.TestMRMultipleOutputs.java

License:Apache License

protected void _testMultipleOutputs(boolean withCounters) throws Exception {
    String input = "a\nb\nc\nd\ne\nc\nd\ne";

    //Configuration conf = createJobConf();
    Configuration conf = new Configuration();
    Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input);

    job.setJobName("mo");
    MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class,
            Text.class);

    MultipleOutputs.setCountersEnabled(job, withCounters);

    job.setMapperClass(MOMap.class);
    job.setReducerClass(MOReduce.class);

    job.waitForCompletion(true);/*w w w . j  a v  a  2  s  .  c o m*/

    // assert number of named output part files
    int namedOutputCount = 0;
    int valueBasedOutputCount = 0;
    FileSystem fs = OUT_DIR.getFileSystem(conf);
    FileStatus[] statuses = fs.listStatus(OUT_DIR);
    for (FileStatus status : statuses) {
        String fileName = status.getPath().getName();
        if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001")
                || fileName.equals("text-r-00000") || fileName.equals("sequence_A-m-00000")
                || fileName.equals("sequence_A-m-00001") || fileName.equals("sequence_B-m-00000")
                || fileName.equals("sequence_B-m-00001") || fileName.equals("sequence_B-r-00000")
                || fileName.equals("sequence_C-r-00000")) {
            namedOutputCount++;
        } else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000")
                || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) {
            valueBasedOutputCount++;
        }
    }
    //assertEquals(9, namedOutputCount);
    //assertEquals(5, valueBasedOutputCount);

    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(
            new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(job), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith(TEXT));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);

    // assert SequenceOutputFormat files correctness
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs,
            new Path(FileOutputFormat.getOutputPath(job), "sequence_B-r-00000"), conf);

    assertEquals(IntWritable.class, seqReader.getKeyClass());
    assertEquals(Text.class, seqReader.getValueClass());

    count = 0;
    IntWritable key = new IntWritable();
    Text value = new Text();
    while (seqReader.next(key, value)) {
        assertEquals(SEQUENCE, value.toString());
        count++;
    }
    seqReader.close();
    assertFalse(count == 0);

    if (withCounters) {
        CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
        assertEquals(9, counters.size());
        assertEquals(4, counters.findCounter(TEXT).getValue());
        assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue());
        assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue());
        assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue());
        assertEquals(2, counters.findCounter("a").getValue());
        assertEquals(2, counters.findCounter("b").getValue());
        assertEquals(4, counters.findCounter("c").getValue());
        assertEquals(4, counters.findCounter("d").getValue());
        assertEquals(4, counters.findCounter("e").getValue());
    }
}