Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException 

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:org.sifarish.social.RatingPredictor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Rating predictor  MR";
    job.setJobName(jobName);//  ww  w.  j a  va  2  s .co m

    job.setJarByClass(RatingPredictor.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(RatingPredictor.PredictionMapper.class);
    job.setReducerClass(RatingPredictor.PredictorReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(ItemIdGroupComprator.class);
    job.setPartitionerClass(ItemIdPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("rap.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.terrier.structures.indexing.NewCompressingMetaIndexBuilder.java

License:Mozilla Public License

public static void reverseAsMapReduceJob(Index index, String structureName, String[] keys, Configuration conf)
        throws Exception {
    long time = System.currentTimeMillis();

    Job job = new Job(conf);

    job.setMapOutputKeyClass(KeyValueTuple.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setMapperClass(MetaIndexMapper.class);
    job.setReducerClass(MetaIndexReducer.class);
    job.setNumReduceTasks(keys.length);//from   w ww  .  j  av a  2 s .  com
    job.setPartitionerClass(KeyedPartitioner.class);
    job.setInputFormatClass(CompressingMetaIndexInputFormat.class);

    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().set("MetaIndexInputStreamRecordReader.structureName", structureName);
    job.getConfiguration().setInt("CompressingMetaIndexBuilder.reverse.keyCount", keys.length);
    job.getConfiguration().set("CompressingMetaIndexBuilder.reverse.keys", ArrayUtils.join(keys, ","));
    job.getConfiguration().set("CompressingMetaIndexBuilder.forward.valueLengths",
            index.getIndexProperty("index." + structureName + ".value-lengths", ""));
    job.getConfiguration().set("CompressingMetaIndexBuilder.forward.keys",
            index.getIndexProperty("index." + structureName + ".key-names", ""));
    FileOutputFormat.setOutputPath(job, new Path(index.getPath(), ".meta"));

    HadoopUtility.toHConfiguration(index, job.getConfiguration());

    //job.setOutputFormatClass(NullOutputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setJarByClass(NewCompressingMetaIndexBuilder.class);

    try {
        if (!job.waitForCompletion(true))
            throw new Exception("job failed");
    } catch (Exception e) {
        throw new Exception("Problem running job to reverse metadata", e);
    }

    //only update the index from the controlling process, so that we dont have locking/concurrency issues
    index.setIndexProperty("index." + structureName + ".reverse-key-names", ArrayUtils.join(keys, ","));
    index.flush();

    logger.info("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds");
}

From source file:org.visitante.basic.EngagementEventGenerator.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log user engaement event generator  MR";
    job.setJobName(jobName);//from   www  .j  a va2 s .c  om

    job.setJarByClass(EngagementEventGenerator.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(SessionExtractor.SessionMapper.class);
    job.setReducerClass(EngagementEventGenerator.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("ee.num.reducer", 1));
    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.visitante.basic.SessionExtractor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log session extraction  MR";
    job.setJobName(jobName);/*from   ww w. j a  v  a2 s .  com*/

    job.setJarByClass(SessionExtractor.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(SessionExtractor.SessionMapper.class);
    job.setReducerClass(SessionExtractor.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.visitante.basic.SessionSummarizer.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log session summarizer  MR";
    job.setJobName(jobName);/*w w  w. j av  a2 s. c  o  m*/

    job.setJarByClass(SessionSummarizer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(SessionExtractor.SessionMapper.class);
    job.setReducerClass(SessionSummarizer.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.visitante.basic.UserSessionSummary.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log user session summary  MR";
    job.setJobName(jobName);/*from www.  j  a  v a2s .  c o  m*/

    job.setJarByClass(UserSessionSummary.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(UserSessionSummary.SessionMapper.class);
    job.setReducerClass(UserSessionSummary.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionExtractor.SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionExtractor.SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:pad.StarDriver.java

License:Apache License

/**
 * Execute the StarDriver Job./* ww  w.  j a v  a  2 s.co  m*/
 * @param args      array of external arguments, not used in this method
 * @return          <c>1</c> if the StarDriver Job failed its execution; <c>0</c> if everything is ok. 
 * @throws Exception 
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // GenericOptionsParser invocation in order to suppress the hadoop warning.
    new GenericOptionsParser(conf, args);
    conf.set("type", this.type.toString());
    Job job = new Job(conf, this.title);
    job.setJarByClass(StarDriver.class);

    job.setMapOutputKeyClass(NodesPairWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(StarMapper.class);
    job.setCombinerClass(StarCombiner.class);
    job.setPartitionerClass(NodePartitioner.class);
    job.setGroupingComparatorClass(NodeGroupingComparator.class);
    job.setReducerClass(StarReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, this.input);
    FileOutputFormat.setOutputPath(job, this.output);

    if (!job.waitForCompletion(verbose))
        return 1;

    // Set up the private variable looking to the counter value
    this.numChanges = job.getCounters().findCounter(UtilCounters.NUM_CHANGES).getValue();
    return 0;
}

From source file:pad.TerminationDriver.java

License:Apache License

/**
 * Execute the TerminationDriver Job./*  w  ww  .j a  va  2s. c o m*/
 * @param args      array of external arguments, not used in this method
 * @return          <c>1</c> if the TerminationDriver Job failed its execution; <c>0</c> if everything is ok. 
 * @throws Exception 
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // GenericOptionsParser invocation in order to suppress the hadoop warning.
    new GenericOptionsParser(conf, args);
    Job job = new Job(conf, "TerminationDriver");
    job.setJarByClass(TerminationDriver.class);

    job.setMapOutputKeyClass(NodesPairWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(ClusterWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(TerminationMapper.class);
    job.setPartitionerClass(NodePartitioner.class);
    job.setGroupingComparatorClass(NodeGroupingComparator.class);
    job.setReducerClass(TerminationReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, this.input);
    FileOutputFormat.setOutputPath(job, this.output);

    if (!job.waitForCompletion(this.verbose))
        return 1;

    // Set up the private variables looking to the counters value
    this.numNodes = job.getCounters().findCounter(UtilCounters.NUM_NODES).getValue();
    this.numClusters = job.getCounters().findCounter(UtilCounters.NUM_CLUSTERS).getValue();
    return 0;
}

From source file:PartitioningNYSE.NYSEPart.java

public int run(String[] args) throws Exception, ClassNotFoundException {
    Configuration conf = getConf();
    Job job = new Job(conf, "IPcount");
    job.setJarByClass(NYSEPart.class);
    //final File f = new File(NYSEPart.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setMapperClass(IPcountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setCombinerClass(IPcountReducer.class);
    job.setReducerClass(IPcountReducer.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    job.setPartitionerClass(IPcountPartitioner.class);
    job.setNumReduceTasks(7);//w  ww  . j  av  a  2s .c  o m
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
}

From source file:Patterns.A3_Partitioning.Partition_Users_By_Country_Driver.java

/**
 * @param args the command line arguments
 *//*www.  java2 s .  c o m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Users by Country");
    job.setJarByClass(Partition_Users_By_Country_Driver.class);

    job.setMapperClass(Partition_Users_By_Country_Mapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // partitioner class inclusion
    job.setPartitionerClass(Partition_Users_By_Country_Partitioner.class);

    // set multiple formats for custom naming partitioning
    MultipleOutputs.addNamedOutput(job, "countryBins", TextOutputFormat.class, Text.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);

    // set num of reduce tasks based on partition we need (here we need 10 cos total no.of countries)
    job.setNumReduceTasks(11);
    job.setReducerClass(Partition_Users_By_Country_Reducer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}