Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException 

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:ph.fingra.hadoop.mapred.parts.prerole.BasekeysMaker.java

License:Apache License

public Job createJobAppkey(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "prerole/appkey job";
    job.setJobName(jobName);/*from w w w  .  j  a  va2s . co  m*/

    job.setJarByClass(BasekeysMaker.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(AppkeyMapper.class);
    job.setReducerClass(AppkeyReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(AppkeyPartitioner.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.prerole.BasekeysMaker.java

License:Apache License

public Job createJobComponentkey(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "prerole/componentkey job";
    job.setJobName(jobName);//from  w w w.  j ava2s . co m

    job.setJarByClass(BasekeysMaker.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(ComponentkeyMapper.class);
    job.setReducerClass(ComponentkeyReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(ComponentkeyPartitioner.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.prerole.ComponentNewuserMerge.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig, Path dbpath, String dbfilename, TargetDate cutdate) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());
    conf.set("dbfilename", dbfilename);
    conf.set("cutyear", cutdate.getYear());
    conf.set("cutmonth", cutdate.getMonth());
    conf.set("cutday", cutdate.getDay());

    Job job = new Job(conf);
    String jobName = "merge/componentnewusermerge job";
    job.setJobName(jobName);//from  w w w. ja  v  a 2 s .  c  om

    job.setJarByClass(ComponentNewuserMerge.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    if (dbpath != null) {
        FileInputFormat.addInputPath(job, dbpath);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(ComponentNewuserMapper.class);
    job.setCombinerClass(ComponentNewuserCombiner.class);
    job.setReducerClass(ComponentNewuserReducer.class);

    job.setMapOutputKeyClass(ComponentNewuserKey.class);
    job.setMapOutputValueClass(ComponentNewuserDb.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(ComponentNewuserPartitioner.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.prerole.LogCountStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "prerole/logcount job";
    job.setJobName(jobName);// www.j a v a 2 s. c  o  m

    job.setJarByClass(LogCountStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(LogCountMapper.class);
    job.setCombinerClass(LogCountReducer.class);
    job.setReducerClass(LogCountReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(LogCountPartitioner.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.prerole.PreTransform.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "prerole/pretransform job";
    job.setJobName(jobName);/*from   w  ww . ja  v a2  s  . c  om*/

    job.setJarByClass(PreTransform.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(PreTransformMapper.class);
    job.setReducerClass(PreTransformReducer.class);

    job.setMapOutputKeyClass(TransformKey.class);
    job.setMapOutputValueClass(TransformContainer.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(PreTransformPartitioner.class);
    job.setSortComparatorClass(PreTransformSortComparator.class);
    job.setGroupingComparatorClass(PreTransformGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:planattributescleaning.PlanAttributesCleaning.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "attributes cleaning");
    job.setJarByClass(PlanAttributesCleaning.class);

    job.setMapperClass(Events_Mapper1.class);

    job.setReducerClass(Events_Reducer1.class);
    job.setPartitionerClass(GroupPartitioner.class);

    job.setNumReduceTasks(4);/*www .j a v  a  2s . c  o  m*/
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0]));
    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);

}

From source file:proj.analysis.uscensus.USCensusFirstJob.java

/**
 * @param args the command line arguments
 *//*from   w w  w . j a v  a  2 s  .com*/
public static void main(String[] args) {
    try {
        Configuration conf = new Configuration();
        // Give the MapRed job a name. You'll see this name in the Yarn
        // webapp.
        Job job1 = Job.getInstance(conf, "US-Census-1");
        // Current class.
        job1.setJarByClass(USCensusFirstJob.class);
        // Mapper
        job1.setMapperClass(USCensusFirstMapper.class);
        // Combiner. We use the reducer as the combiner in this case.
        job1.setCombinerClass(USCensusFirstCombiner.class);
        // Reducer
        job1.setReducerClass(USCensusFirstReducer.class);
        job1.setPartitionerClass(USCensusFirstPartitioner.class);
        job1.setNumReduceTasks(USCensusFirstPartitioner.statesName.size());
        // Outputs from the Mapper.
        job1.setMapOutputKeyClass(Text.class);
        job1.setMapOutputValueClass(MapperOPValue.class);
        // Outputs from Reducer. It is sufficient to set only the following
        // two properties
        // if the Mapper and Reducer has same key and value types. It is set
        // separately for
        // elaboration.
        job1.setOutputKeyClass(Text.class);
        job1.setOutputValueClass(Text.class);
        // path to input in HDFS
        FileInputFormat.addInputPath(job1, new Path(args[0]));
        FileSystem fileSystem = FileSystem.get(conf);

        String outputPath = args[1] + "_temp";

        if (fileSystem.exists(new Path(outputPath))) {
            fileSystem.delete(new Path(outputPath), true);
        }
        // path to output in HDFS
        FileOutputFormat.setOutputPath(job1, new Path(outputPath));
        // Block until the job is completed.
        boolean isCompleted = job1.waitForCompletion(true);
        //Second job
        //            Configuration conf2 = new Configuration();
        if (isCompleted) {
            Job job2 = Job.getInstance(conf, "US-Census-2");
            job2.setJarByClass(USCensusFirstJob.class);
            // Mapper
            job2.setMapperClass(USCensusSecondMapper.class);
            // Reducer
            job2.setReducerClass(USCensusSecondReducer.class);
            job2.setNumReduceTasks(1);
            // Outputs from the Mapper.
            job2.setMapOutputKeyClass(Text.class);
            job2.setMapOutputValueClass(Text.class);
            // Outputs from Reducer. It is sufficient to set only the following
            // two properties
            // if the Mapper and Reducer has same key and value types. It is set
            // separately for
            // elaboration.
            job2.setOutputKeyClass(Text.class);
            job2.setOutputValueClass(Text.class);
            // path to input in HDFS
            FileInputFormat.addInputPath(job2, new Path(outputPath));
            //            FileSystem fileSystem = FileSystem.get(conf1);

            String outputPath2 = args[1];

            if (fileSystem.exists(new Path(outputPath2))) {
                fileSystem.delete(new Path(outputPath2), true);
            }
            // path to output in HDFS
            FileOutputFormat.setOutputPath(job2, new Path(outputPath2));
            System.exit(job2.waitForCompletion(true) ? 0 : 1);
        }
    } catch (IOException e) {
        System.err.println(e.getMessage());
    } catch (InterruptedException e) {
        System.err.println(e.getMessage());
    } catch (ClassNotFoundException e) {
        System.err.println(e.getMessage());
    }

}

From source file:sa.edu.kaust.twitter.index.IndexTweets.java

License:Apache License

public static void run(String input, String output, int reduceTasks, String hashtag, String url, String startID,
        String endID, boolean expandHashtag, boolean expandURL) throws Exception {

    Path inputPath = new Path(input);
    Path outputPath = new Path(output);

    sLogger.info("input dir: " + inputPath);
    sLogger.info("output dir: " + outputPath);
    sLogger.info("num of output files: " + reduceTasks);

    Configuration conf = new Configuration();
    conf.set("startID", startID);
    conf.set("endID", endID);
    conf.setBoolean("expandHashtag", expandHashtag);
    conf.setBoolean("expandURL", expandURL);
    FileSystem fs = FileSystem.get(conf);
    Job job = new Job(conf, "IPIndexTweets");
    job.setJarByClass(IndexTweets.class);
    job.setNumReduceTasks(reduceTasks);//from w w w.j  av  a 2 s.c om
    if (job.getConfiguration().get("mapred.job.tracker").equals("local")) {
        job.getConfiguration().set("HashtagRepresentation", hashtag);
        job.getConfiguration().set("UrlRepresentation", url);
    } else {
        DistributedCache.addCacheFile(new URI(hashtag), job.getConfiguration());
        DistributedCache.addCacheFile(new URI(url), job.getConfiguration());
    }

    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    //conf.set("mapred.child.java.opts", "-Xmx2048m");

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(PairOfStringLong.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TweetPostingsList.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // delete the output directory if it exists already
    //FileSystem.get(conf).delete(new Path(output), true);
    if (fs.exists(outputPath)) {
        sLogger.info("Output already exists: skipping!");
        return;
    }

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
}

From source file:sampler.TotalOrderPrep.java

License:Open Source License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {

    Job sample_job = new Job();

    // Remember the real input format so the sampling input format can use
    // it under the hood

    sample_job.getConfiguration().setBoolean(ARG_INPUTFORMAT, true);
    sample_job.setInputFormatClass(TextInputFormat.class);

    //sample_job.getConfiguration().set("mapred.fairscheduler.pool", "pool9");
    // Base the sample size on the number of reduce tasks that will be used
    // by the real job, but only use 1 reducer for this job (maps output very
    // little)//from  w w  w.j  a  va2s . com
    sample_job.setNumReduceTasks(1);

    // Make this job's output a temporary filethe input file for the real job's
    // TotalOrderPartitioner
    Path partition = new Path("partitions/");
    //partition.getFileSystem(job.getConfiguration()).deleteOnExit(partition);

    conf = new Configuration();
    FileSystem fs;
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(partition)) {
            fs.delete(partition, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(sample_job, partition);
    FileInputFormat.setInputPaths(sample_job, new Path(args[0]));
    //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path(partition, "part-r-00000"));
    //job.setPartitionerClass(TotalOrderPartitioner.class);

    // If there's a combiner, turn it into an identity reducer to prevent
    // destruction of keys.

    sample_job.setCombinerClass(Combiner.class);

    sample_job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setMapOutputValueClass(ImmutableBytesWritable.class);
    sample_job.setOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setOutputValueClass(NullWritable.class);
    sample_job.setPartitionerClass(HashPartitioner.class);
    sample_job.setOutputFormatClass(SequenceFileOutputFormat.class);
    sample_job.setJarByClass(TotalOrderPrep.class);
    sample_job.setMapperClass(Map.class);
    sample_job.setReducerClass(PartitioningReducer.class);
    sample_job.setJobName("(Sampler)");
    sample_job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    sample_job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    return sample_job;

    // Run the job.  If it fails, then it's probably because of the main job.
    /*try {
       sample_job.waitForCompletion(false);
            
       if( !sample_job.isSuccessful() )
    throw new RuntimeException("Partition sampler job failed.");
            
    } catch (Exception e) {
       throw new RuntimeException("Failed to start Partition sampler.", e);
    }*/
}

From source file:SecondarySort.HashToAlternateWithSS.java

protected Job jobConfig() throws IOException {
    JobConf conf = new JobConf();
    Job job = new Job(conf, "iteration");
    job.setJarByClass(HashToAlternateWithSS.class);
    job.setReducerClass(ReduceSS.class);
    job.setPartitionerClass(LongPair.HPartitioner.class);
    job.setSortComparatorClass(LongPair.Comparator.class);
    job.setGroupingComparatorClass(LongPair.GroupComparator.class);
    job.setOutputKeyClass(LongPair.class);
    job.setOutputValueClass(Text.class);
    return job;/*from   w ww.  ja v  a 2s .  com*/
}