Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:org.sifarish.social.RatingPredictor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Rating predictor  MR";
    job.setJobName(jobName);//  ww  w.  j a  va  2  s .co m

    job.setJarByClass(RatingPredictor.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(RatingPredictor.PredictionMapper.class);
    job.setReducerClass(RatingPredictor.PredictorReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(ItemIdGroupComprator.class);
    job.setPartitionerClass(ItemIdPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("rap.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.terrier.structures.indexing.NewCompressingMetaIndexBuilder.java

License:Mozilla Public License

public static void reverseAsMapReduceJob(Index index, String structureName, String[] keys, Configuration conf)
        throws Exception {
    long time = System.currentTimeMillis();

    Job job = new Job(conf);

    job.setMapOutputKeyClass(KeyValueTuple.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setMapperClass(MetaIndexMapper.class);
    job.setReducerClass(MetaIndexReducer.class);
    job.setNumReduceTasks(keys.length);//from   w ww  .  j  av a  2 s .  com
    job.setPartitionerClass(KeyedPartitioner.class);
    job.setInputFormatClass(CompressingMetaIndexInputFormat.class);

    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().set("MetaIndexInputStreamRecordReader.structureName", structureName);
    job.getConfiguration().setInt("CompressingMetaIndexBuilder.reverse.keyCount", keys.length);
    job.getConfiguration().set("CompressingMetaIndexBuilder.reverse.keys", ArrayUtils.join(keys, ","));
    job.getConfiguration().set("CompressingMetaIndexBuilder.forward.valueLengths",
            index.getIndexProperty("index." + structureName + ".value-lengths", ""));
    job.getConfiguration().set("CompressingMetaIndexBuilder.forward.keys",
            index.getIndexProperty("index." + structureName + ".key-names", ""));
    FileOutputFormat.setOutputPath(job, new Path(index.getPath(), ".meta"));

    HadoopUtility.toHConfiguration(index, job.getConfiguration());

    //job.setOutputFormatClass(NullOutputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setJarByClass(NewCompressingMetaIndexBuilder.class);

    try {
        if (!job.waitForCompletion(true))
            throw new Exception("job failed");
    } catch (Exception e) {
        throw new Exception("Problem running job to reverse metadata", e);
    }

    //only update the index from the controlling process, so that we dont have locking/concurrency issues
    index.setIndexProperty("index." + structureName + ".reverse-key-names", ArrayUtils.join(keys, ","));
    index.flush();

    logger.info("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds");
}

From source file:org.visitante.basic.EngagementEventGenerator.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log user engaement event generator  MR";
    job.setJobName(jobName);//from   www  .j  a va2 s .c  om

    job.setJarByClass(EngagementEventGenerator.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(SessionExtractor.SessionMapper.class);
    job.setReducerClass(EngagementEventGenerator.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("ee.num.reducer", 1));
    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.visitante.basic.SessionExtractor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log session extraction  MR";
    job.setJobName(jobName);/*from   ww w. j a  v  a2 s .  com*/

    job.setJarByClass(SessionExtractor.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(SessionExtractor.SessionMapper.class);
    job.setReducerClass(SessionExtractor.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.visitante.basic.SessionSummarizer.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log session summarizer  MR";
    job.setJobName(jobName);/*w w  w. j av  a2 s. c  o  m*/

    job.setJarByClass(SessionSummarizer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(SessionExtractor.SessionMapper.class);
    job.setReducerClass(SessionSummarizer.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.visitante.basic.UserSessionSummary.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log user session summary  MR";
    job.setJobName(jobName);/*from www.  j  a  v a2s .  c o  m*/

    job.setJarByClass(UserSessionSummary.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(UserSessionSummary.SessionMapper.class);
    job.setReducerClass(UserSessionSummary.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionExtractor.SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionExtractor.SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:pad.StarDriver.java

License:Apache License

/**
 * Execute the StarDriver Job./* ww  w.  j a v  a  2 s.co  m*/
 * @param args      array of external arguments, not used in this method
 * @return          <c>1</c> if the StarDriver Job failed its execution; <c>0</c> if everything is ok. 
 * @throws Exception 
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // GenericOptionsParser invocation in order to suppress the hadoop warning.
    new GenericOptionsParser(conf, args);
    conf.set("type", this.type.toString());
    Job job = new Job(conf, this.title);
    job.setJarByClass(StarDriver.class);

    job.setMapOutputKeyClass(NodesPairWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(StarMapper.class);
    job.setCombinerClass(StarCombiner.class);
    job.setPartitionerClass(NodePartitioner.class);
    job.setGroupingComparatorClass(NodeGroupingComparator.class);
    job.setReducerClass(StarReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, this.input);
    FileOutputFormat.setOutputPath(job, this.output);

    if (!job.waitForCompletion(verbose))
        return 1;

    // Set up the private variable looking to the counter value
    this.numChanges = job.getCounters().findCounter(UtilCounters.NUM_CHANGES).getValue();
    return 0;
}

From source file:pad.TerminationDriver.java

License:Apache License

/**
 * Execute the TerminationDriver Job./*  w  ww  .j a  va  2s. c o m*/
 * @param args      array of external arguments, not used in this method
 * @return          <c>1</c> if the TerminationDriver Job failed its execution; <c>0</c> if everything is ok. 
 * @throws Exception 
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // GenericOptionsParser invocation in order to suppress the hadoop warning.
    new GenericOptionsParser(conf, args);
    Job job = new Job(conf, "TerminationDriver");
    job.setJarByClass(TerminationDriver.class);

    job.setMapOutputKeyClass(NodesPairWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(ClusterWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(TerminationMapper.class);
    job.setPartitionerClass(NodePartitioner.class);
    job.setGroupingComparatorClass(NodeGroupingComparator.class);
    job.setReducerClass(TerminationReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, this.input);
    FileOutputFormat.setOutputPath(job, this.output);

    if (!job.waitForCompletion(this.verbose))
        return 1;

    // Set up the private variables looking to the counters value
    this.numNodes = job.getCounters().findCounter(UtilCounters.NUM_NODES).getValue();
    this.numClusters = job.getCounters().findCounter(UtilCounters.NUM_CLUSTERS).getValue();
    return 0;
}

From source file:PartitioningNYSE.NYSEPart.java

public int run(String[] args) throws Exception, ClassNotFoundException {
    Configuration conf = getConf();
    Job job = new Job(conf, "IPcount");
    job.setJarByClass(NYSEPart.class);
    //final File f = new File(NYSEPart.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setMapperClass(IPcountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setCombinerClass(IPcountReducer.class);
    job.setReducerClass(IPcountReducer.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    job.setPartitionerClass(IPcountPartitioner.class);
    job.setNumReduceTasks(7);//w  ww  . j  av  a  2s .c  o m
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
}

From source file:Patterns.A3_Partitioning.Partition_Users_By_Country_Driver.java

/**
 * @param args the command line arguments
 *//*www.  java2 s .  c o m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Users by Country");
    job.setJarByClass(Partition_Users_By_Country_Driver.class);

    job.setMapperClass(Partition_Users_By_Country_Mapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // partitioner class inclusion
    job.setPartitionerClass(Partition_Users_By_Country_Partitioner.class);

    // set multiple formats for custom naming partitioning
    MultipleOutputs.addNamedOutput(job, "countryBins", TextOutputFormat.class, Text.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);

    // set num of reduce tasks based on partition we need (here we need 10 cos total no.of countries)
    job.setNumReduceTasks(11);
    job.setReducerClass(Partition_Users_By_Country_Reducer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}