Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:ph.fingra.hadoop.mapred.parts.prerole.BasekeysMaker.java

License:Apache License

public Job createJobAppkey(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "prerole/appkey job";
    job.setJobName(jobName);/*from w w w  .  j  a  va2s . co  m*/

    job.setJarByClass(BasekeysMaker.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(AppkeyMapper.class);
    job.setReducerClass(AppkeyReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(AppkeyPartitioner.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.prerole.BasekeysMaker.java

License:Apache License

public Job createJobComponentkey(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "prerole/componentkey job";
    job.setJobName(jobName);//from  w w w.  j ava2s . co m

    job.setJarByClass(BasekeysMaker.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(ComponentkeyMapper.class);
    job.setReducerClass(ComponentkeyReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(ComponentkeyPartitioner.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.prerole.ComponentNewuserMerge.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig, Path dbpath, String dbfilename, TargetDate cutdate) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());
    conf.set("dbfilename", dbfilename);
    conf.set("cutyear", cutdate.getYear());
    conf.set("cutmonth", cutdate.getMonth());
    conf.set("cutday", cutdate.getDay());

    Job job = new Job(conf);
    String jobName = "merge/componentnewusermerge job";
    job.setJobName(jobName);//from  w w w. ja  v  a 2 s .  c  om

    job.setJarByClass(ComponentNewuserMerge.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    if (dbpath != null) {
        FileInputFormat.addInputPath(job, dbpath);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(ComponentNewuserMapper.class);
    job.setCombinerClass(ComponentNewuserCombiner.class);
    job.setReducerClass(ComponentNewuserReducer.class);

    job.setMapOutputKeyClass(ComponentNewuserKey.class);
    job.setMapOutputValueClass(ComponentNewuserDb.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(ComponentNewuserPartitioner.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.prerole.LogCountStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "prerole/logcount job";
    job.setJobName(jobName);// www.j a v a 2 s. c  o  m

    job.setJarByClass(LogCountStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(LogCountMapper.class);
    job.setCombinerClass(LogCountReducer.class);
    job.setReducerClass(LogCountReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(LogCountPartitioner.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.prerole.PreTransform.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "prerole/pretransform job";
    job.setJobName(jobName);/*from   w  ww . ja  v a2  s  . c  om*/

    job.setJarByClass(PreTransform.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(PreTransformMapper.class);
    job.setReducerClass(PreTransformReducer.class);

    job.setMapOutputKeyClass(TransformKey.class);
    job.setMapOutputValueClass(TransformContainer.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(PreTransformPartitioner.class);
    job.setSortComparatorClass(PreTransformSortComparator.class);
    job.setGroupingComparatorClass(PreTransformGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:planattributescleaning.PlanAttributesCleaning.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "attributes cleaning");
    job.setJarByClass(PlanAttributesCleaning.class);

    job.setMapperClass(Events_Mapper1.class);

    job.setReducerClass(Events_Reducer1.class);
    job.setPartitionerClass(GroupPartitioner.class);

    job.setNumReduceTasks(4);/*www .j a v  a  2s . c  o  m*/
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0]));
    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);

}

From source file:proj.analysis.uscensus.USCensusFirstJob.java

/**
 * @param args the command line arguments
 *//*from   w w  w . j a v  a  2 s  .com*/
public static void main(String[] args) {
    try {
        Configuration conf = new Configuration();
        // Give the MapRed job a name. You'll see this name in the Yarn
        // webapp.
        Job job1 = Job.getInstance(conf, "US-Census-1");
        // Current class.
        job1.setJarByClass(USCensusFirstJob.class);
        // Mapper
        job1.setMapperClass(USCensusFirstMapper.class);
        // Combiner. We use the reducer as the combiner in this case.
        job1.setCombinerClass(USCensusFirstCombiner.class);
        // Reducer
        job1.setReducerClass(USCensusFirstReducer.class);
        job1.setPartitionerClass(USCensusFirstPartitioner.class);
        job1.setNumReduceTasks(USCensusFirstPartitioner.statesName.size());
        // Outputs from the Mapper.
        job1.setMapOutputKeyClass(Text.class);
        job1.setMapOutputValueClass(MapperOPValue.class);
        // Outputs from Reducer. It is sufficient to set only the following
        // two properties
        // if the Mapper and Reducer has same key and value types. It is set
        // separately for
        // elaboration.
        job1.setOutputKeyClass(Text.class);
        job1.setOutputValueClass(Text.class);
        // path to input in HDFS
        FileInputFormat.addInputPath(job1, new Path(args[0]));
        FileSystem fileSystem = FileSystem.get(conf);

        String outputPath = args[1] + "_temp";

        if (fileSystem.exists(new Path(outputPath))) {
            fileSystem.delete(new Path(outputPath), true);
        }
        // path to output in HDFS
        FileOutputFormat.setOutputPath(job1, new Path(outputPath));
        // Block until the job is completed.
        boolean isCompleted = job1.waitForCompletion(true);
        //Second job
        //            Configuration conf2 = new Configuration();
        if (isCompleted) {
            Job job2 = Job.getInstance(conf, "US-Census-2");
            job2.setJarByClass(USCensusFirstJob.class);
            // Mapper
            job2.setMapperClass(USCensusSecondMapper.class);
            // Reducer
            job2.setReducerClass(USCensusSecondReducer.class);
            job2.setNumReduceTasks(1);
            // Outputs from the Mapper.
            job2.setMapOutputKeyClass(Text.class);
            job2.setMapOutputValueClass(Text.class);
            // Outputs from Reducer. It is sufficient to set only the following
            // two properties
            // if the Mapper and Reducer has same key and value types. It is set
            // separately for
            // elaboration.
            job2.setOutputKeyClass(Text.class);
            job2.setOutputValueClass(Text.class);
            // path to input in HDFS
            FileInputFormat.addInputPath(job2, new Path(outputPath));
            //            FileSystem fileSystem = FileSystem.get(conf1);

            String outputPath2 = args[1];

            if (fileSystem.exists(new Path(outputPath2))) {
                fileSystem.delete(new Path(outputPath2), true);
            }
            // path to output in HDFS
            FileOutputFormat.setOutputPath(job2, new Path(outputPath2));
            System.exit(job2.waitForCompletion(true) ? 0 : 1);
        }
    } catch (IOException e) {
        System.err.println(e.getMessage());
    } catch (InterruptedException e) {
        System.err.println(e.getMessage());
    } catch (ClassNotFoundException e) {
        System.err.println(e.getMessage());
    }

}

From source file:sa.edu.kaust.twitter.index.IndexTweets.java

License:Apache License

public static void run(String input, String output, int reduceTasks, String hashtag, String url, String startID,
        String endID, boolean expandHashtag, boolean expandURL) throws Exception {

    Path inputPath = new Path(input);
    Path outputPath = new Path(output);

    sLogger.info("input dir: " + inputPath);
    sLogger.info("output dir: " + outputPath);
    sLogger.info("num of output files: " + reduceTasks);

    Configuration conf = new Configuration();
    conf.set("startID", startID);
    conf.set("endID", endID);
    conf.setBoolean("expandHashtag", expandHashtag);
    conf.setBoolean("expandURL", expandURL);
    FileSystem fs = FileSystem.get(conf);
    Job job = new Job(conf, "IPIndexTweets");
    job.setJarByClass(IndexTweets.class);
    job.setNumReduceTasks(reduceTasks);//from w w w.j  av  a 2 s.c om
    if (job.getConfiguration().get("mapred.job.tracker").equals("local")) {
        job.getConfiguration().set("HashtagRepresentation", hashtag);
        job.getConfiguration().set("UrlRepresentation", url);
    } else {
        DistributedCache.addCacheFile(new URI(hashtag), job.getConfiguration());
        DistributedCache.addCacheFile(new URI(url), job.getConfiguration());
    }

    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    //conf.set("mapred.child.java.opts", "-Xmx2048m");

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(PairOfStringLong.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TweetPostingsList.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // delete the output directory if it exists already
    //FileSystem.get(conf).delete(new Path(output), true);
    if (fs.exists(outputPath)) {
        sLogger.info("Output already exists: skipping!");
        return;
    }

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
}

From source file:sampler.TotalOrderPrep.java

License:Open Source License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {

    Job sample_job = new Job();

    // Remember the real input format so the sampling input format can use
    // it under the hood

    sample_job.getConfiguration().setBoolean(ARG_INPUTFORMAT, true);
    sample_job.setInputFormatClass(TextInputFormat.class);

    //sample_job.getConfiguration().set("mapred.fairscheduler.pool", "pool9");
    // Base the sample size on the number of reduce tasks that will be used
    // by the real job, but only use 1 reducer for this job (maps output very
    // little)//from  w w  w.j  a  va2s . com
    sample_job.setNumReduceTasks(1);

    // Make this job's output a temporary filethe input file for the real job's
    // TotalOrderPartitioner
    Path partition = new Path("partitions/");
    //partition.getFileSystem(job.getConfiguration()).deleteOnExit(partition);

    conf = new Configuration();
    FileSystem fs;
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(partition)) {
            fs.delete(partition, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(sample_job, partition);
    FileInputFormat.setInputPaths(sample_job, new Path(args[0]));
    //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path(partition, "part-r-00000"));
    //job.setPartitionerClass(TotalOrderPartitioner.class);

    // If there's a combiner, turn it into an identity reducer to prevent
    // destruction of keys.

    sample_job.setCombinerClass(Combiner.class);

    sample_job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setMapOutputValueClass(ImmutableBytesWritable.class);
    sample_job.setOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setOutputValueClass(NullWritable.class);
    sample_job.setPartitionerClass(HashPartitioner.class);
    sample_job.setOutputFormatClass(SequenceFileOutputFormat.class);
    sample_job.setJarByClass(TotalOrderPrep.class);
    sample_job.setMapperClass(Map.class);
    sample_job.setReducerClass(PartitioningReducer.class);
    sample_job.setJobName("(Sampler)");
    sample_job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    sample_job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    return sample_job;

    // Run the job.  If it fails, then it's probably because of the main job.
    /*try {
       sample_job.waitForCompletion(false);
            
       if( !sample_job.isSuccessful() )
    throw new RuntimeException("Partition sampler job failed.");
            
    } catch (Exception e) {
       throw new RuntimeException("Failed to start Partition sampler.", e);
    }*/
}

From source file:SecondarySort.HashToAlternateWithSS.java

protected Job jobConfig() throws IOException {
    JobConf conf = new JobConf();
    Job job = new Job(conf, "iteration");
    job.setJarByClass(HashToAlternateWithSS.class);
    job.setReducerClass(ReduceSS.class);
    job.setPartitionerClass(LongPair.HPartitioner.class);
    job.setSortComparatorClass(LongPair.Comparator.class);
    job.setGroupingComparatorClass(LongPair.GroupComparator.class);
    job.setOutputKeyClass(LongPair.class);
    job.setOutputValueClass(Text.class);
    return job;/*from   w ww.  ja v  a 2s .  com*/
}