Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:com.jbw.recommendsystem.guiyihua.GYHMRD.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path in = new Path(conf.get("in"));
    Path out = new Path(conf.get("out"));

    Job job = Job.getInstance(conf);
    job.setJarByClass(GYHMRD.class);
    job.setJobName("fdsjh");
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(TextInputFormat.class);

    job.setPartitionerClass(XXPartition.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(2);/*from  www .j  a v  a2 s  .c  o  m*/

    TextInputFormat.addInputPath(job, in);
    TextOutputFormat.setOutputPath(job, out);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.juniarto.secondsorter.SsJob.java

public int run(String[] allArgs) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf, "secondary sort");

    job.setJarByClass(SsJob.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(SsMapper.class);
    job.setReducerClass(SsReducer.class);
    job.setNumReduceTasks(2);/*from   w  w w. ja v  a  2s  .c  o m*/

    String[] args = new GenericOptionsParser(getConf(), allArgs).getRemainingArgs();
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //job.submit();

    long time1 = System.nanoTime();
    boolean status = job.waitForCompletion(true);
    long time2 = System.nanoTime();
    long timeSpent = time2 - time1;
    LOG.info("TIME: " + timeSpent);
    return 0;

}

From source file:com.kk.hadoop.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);//from w  w  w. j ava  2s  .  c  om
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(2);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.metamx.druid.indexer.IndexGeneratorJob.java

License:Open Source License

public boolean run() {
    try {//from  ww  w  .  j a va2 s  .c om
        Job job = new Job(new Configuration(),
                String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));

        job.getConfiguration().set("io.sort.record.percent", "0.23");

        for (String propName : System.getProperties().stringPropertyNames()) {
            Configuration conf = job.getConfiguration();
            if (propName.startsWith("hadoop.")) {
                conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
            }
        }

        job.setInputFormatClass(TextInputFormat.class);

        job.setMapperClass(IndexGeneratorMapper.class);
        job.setMapOutputValueClass(Text.class);

        SortableBytes.useSortableBytesAsMapOutputKey(job);

        job.setNumReduceTasks(Iterables.size(config.getAllBuckets()));
        job.setPartitionerClass(IndexGeneratorPartitioner.class);

        job.setReducerClass(IndexGeneratorReducer.class);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
        FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());

        config.addInputPaths(job);
        config.intoConfiguration(job);

        job.setJarByClass(IndexGeneratorJob.class);

        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());

        boolean success = job.waitForCompletion(true);

        Counter invalidRowCount = job.getCounters()
                .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
        jobStats.setInvalidRowCount(invalidRowCount.getValue());

        return success;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.metamx.druid.indexer.SortableBytes.java

License:Open Source License

public static void useSortableBytesAsMapOutputKey(Job job) {
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setGroupingComparatorClass(SortableBytesGroupingComparator.class);
    job.setSortComparatorClass(SortableBytesSortingComparator.class);
    job.setPartitionerClass(SortableBytesPartitioner.class);
}

From source file:com.moz.fiji.mapreduce.platform.CDH5FijiMRBridge.java

License:Apache License

@Override
public void setTotalOrderPartitionerClass(Job job) {
    job.setPartitionerClass(TotalOrderPartitioner.class);
}

From source file:com.neu.cs6240.AvgTimeToAnswer.AvgTimeToAnsPerHashTag.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapred.textoutputformat.separator", ",");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: AvgTimeToAnsPerHashTag <in> <out>");
        System.exit(2);/* w  w  w  . j av  a2 s.co m*/
    }
    Job job = new Job(conf, "AvgTimeToAnsPerHashTag");
    job.setJarByClass(AvgTimeToAnsPerHashTag.class);
    job.setMapperClass(AvgTimeToAnsPerHashTagMapper.class);
    job.setReducerClass(AvgTimeToAnsPerHashTagReducer.class);
    job.setCombinerClass(AvgTimeToAnsPerHashTagReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setPartitionerClass(AvgTimeToAnsPerHashTagPartitioner.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.neu.cs6240.TopKExperts.JoinQA.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: JoinQA <in> <out>");
        System.exit(2);//from w ww. ja v a2 s  .c  o  m
    }
    Job job = new Job(conf, "JoinQA");
    job.setJarByClass(JoinQA.class);
    job.setMapperClass(JoinQAMapper.class);
    job.setReducerClass(JoinQAReducer.class);
    job.setOutputKeyClass(JoinQAKey.class);
    job.setOutputValueClass(JoinQAValue.class);
    job.setPartitionerClass(JoinQAPartitioner.class);
    job.setGroupingComparatorClass(JoinQAGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    boolean isSucess = false;

    isSucess = job.waitForCompletion(true);

    if (isSucess) {
        // On successful completion of JoinQA start UserAnswerCountPerHashTag
        System.out.println("MR - JoinQA complete. Starting UserAnswerCountPerHashTag...");
        String[] argsForMR2 = new String[2];
        argsForMR2[0] = otherArgs[1];
        argsForMR2[1] = otherArgs[1] + "MR2";
        isSucess = UserAnswerCountPerHashTag.initUserAnswerCountPerHashTag(argsForMR2);

        if (isSucess) {
            // On successful completion of UserAnswerCountPerHashTag start TopKPerHashTag
            System.out.println("MR - UserAnswerCountPerHashTag complete. Starting TopKPerHashTag...");
            String[] argsForMR3 = new String[2];
            argsForMR3[0] = argsForMR2[1];
            argsForMR3[1] = argsForMR2[1] + "MR3";
            isSucess = TopKPerHashTag.initTopKPerHashTag(argsForMR3);
            if (isSucess) {
                // Successfully complete TopKPerHashTag MR
                System.out.println("All MR - Successful.");
            } else {
                // Failed UserAnswerCountPerHashTag MR
                System.out.println("MR - TopKPerHashTag failed.");
            }
        } else {
            // On unsuccessful completion of JoinQA end MR
            System.out.println("MR - UserAnswerCountPerHashTag failed.");
        }

    } else {
        // On unsuccessful completion of JoinQA end MR
        System.out.println("MR - JoinQA failed.");
    }

    System.exit(isSucess ? 0 : 1);
}

From source file:com.neu.cs6240.TopKExperts.TopKPerHashTag.java

License:Apache License

public static boolean initTopKPerHashTag(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: TopKPerHashTag <in> <out>");
        System.exit(2);/*  w  w  w.j  a  v  a2s  .  c  o  m*/
    }
    Job job = new Job(conf, "TopKPerHashTag");
    job.setJarByClass(TopKPerHashTag.class);
    job.setMapperClass(TopKPerHashTagMapper.class);
    job.setReducerClass(TopKPerHashTagReducer.class);
    job.setOutputKeyClass(TopKPerHashTagKey.class);
    job.setOutputValueClass(Text.class);
    job.setPartitionerClass(TopKPerHashTagPartitioner.class);
    job.setGroupingComparatorClass(TopKPerHashTagGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    return job.waitForCompletion(true);
}

From source file:com.neu.cs6240.TopKExperts.UserAnswerCountPerHashTag.java

License:Apache License

public static boolean initUserAnswerCountPerHashTag(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: UserAnswerCountPerHashTag <in> <out>");
        System.exit(2);//from w  w  w  .ja  v a  2 s .co m
    }
    Job job = new Job(conf, "UserAnswerCountPerHashTag");
    job.setJarByClass(UserAnswerCountPerHashTag.class);
    job.setMapperClass(UserAnswerCountPerHashTagMapper.class);
    job.setReducerClass(UserAnswerCountPerHashTagReducer.class);
    job.setOutputKeyClass(UserAnswerCountPerHashTagKey.class);
    job.setOutputValueClass(IntWritable.class);
    job.setPartitionerClass(UserAnswerCountPerHashTagPartitioner.class);
    job.setGroupingComparatorClass(UserAnswerCountPerHashTagGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    return job.waitForCompletion(true);
}