Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException 

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:com.neu.cs6240.Xml2csvComments.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // Setting up the xml tag configurator for splitter
    conf.set("xmlinput.start", "<row ");
    conf.set("xmlinput.end", " />");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: Xml2csvPosts <in> <out>");
        System.exit(2);/*w w  w . j  a  v a2s.  c om*/
    }
    Job job = new Job(conf, "Converts Posts.xml to .csv");
    job.setJarByClass(Xml2csvPosts.class);
    job.setInputFormatClass(XmlInputFormat.class);
    job.setMapperClass(CommentsMapper.class);
    job.setReducerClass(CommentsReducer.class);
    job.setPartitionerClass(PostsPartitioner.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    // Set as per your file size
    job.setNumReduceTasks(10);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.neu.cs6240.Xml2csvPosts.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // Setting up the xml tag configurator for splitter
    conf.set("xmlinput.start", "<row ");
    conf.set("xmlinput.end", " />");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: Xml2csvPosts <in> <out>");
        System.exit(2);// www. j av  a  2s  .c o  m
    }
    Job job = new Job(conf, "Converts Posts.xml to .csv");
    job.setJarByClass(Xml2csvPosts.class);
    job.setInputFormatClass(XmlInputFormat.class);
    job.setMapperClass(PostsMapper.class);
    job.setReducerClass(PostsReducer.class);
    job.setPartitionerClass(PostsPartitioner.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    // Set as per your file size
    job.setNumReduceTasks(15);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.phantom.hadoop.examples.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        System.exit(2);/*from   w w w  . j  a v a  2  s  .  co m*/
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.phantom.hadoop.examples.terasort.TeraSort.java

License:Apache License

public int run(String[] args) throws Exception {
    LOG.info("starting");
    Job job = Job.getInstance(getConf());
    Path inputDir = new Path(args[0]);
    Path outputDir = new Path(args[1]);
    boolean useSimplePartitioner = getUseSimplePartitioner(job);
    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TeraInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    if (useSimplePartitioner) {
        job.setPartitionerClass(SimplePartitioner.class);
    } else {/*from  w w w.  jav a2s  . c  o m*/
        long start = System.currentTimeMillis();
        Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME);
        URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
        try {
            TeraInputFormat.writePartitionFile(job, partitionFile);
        } catch (Throwable e) {
            LOG.error(e.getMessage());
            return -1;
        }
        job.addCacheFile(partitionUri);
        long end = System.currentTimeMillis();
        System.out.println("Spent " + (end - start) + "ms computing partitions.");
        job.setPartitionerClass(TotalOrderPartitioner.class);
    }

    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
    TeraOutputFormat.setFinalSync(job, true);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    LOG.info("done");
    return ret;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob1(Path input1, Path input2, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
    conf.set("mapred.output.compression.type", "BLOCK");

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage1");
    job.setJarByClass(Runner.class);

    job.setMapperClass(IterationStage1._Mapper.class);
    job.setReducerClass(IterationStage1._Reducer.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setMapOutputKeyClass(IterationStage1.JoinKey.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setGroupingComparatorClass(IterationStage1.IndexComparator.class);
    job.setPartitionerClass(IterationStage1.IndexPartitioner.class);
    job.setSortComparatorClass(IterationStage1.SortComparator.class);

    FileInputFormat.setInputPaths(job, input1, input2);
    SequenceFileOutputFormat.setOutputPath(job, output);
    SequenceFileOutputFormat.setCompressOutput(job, true);

    setCompression(job);/*  w w  w  .j a  va2  s  .  c  om*/

    return job;
}

From source file:com.rockstor.compact.GenGarbageIndexTool.java

License:Apache License

private Job createSubmittableJob(Configuration conf) throws IOException {
    Job job = new Job(conf, NAME);

    job.setJarByClass(GenGarbageIndexTool.class);
    Scan scan = new Scan();
    TableMapReduceUtil.initTableMapperJob(GarbageChunkDB.TAB_NAME, scan, GarbageChunkMapper.class,
            ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);

    TableMapReduceUtil.setScannerCaching(job, batchSize);
    job.setReducerClass(GarbageChunkReduce.class);
    job.setPartitionerClass(GarbageChunkPartition.class);
    job.setCombinerClass(GarbageChunkCombine.class);

    job.setNumReduceTasks(Compactor.getInstance().getReduceNum());
    job.setOutputFormatClass(NullOutputFormat.class);

    LOG.info("init job " + NAME + " finished!");
    return job;//www. j av  a  2  s. com
}

From source file:com.sanjay.mapreduce.SiCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from   ww w .  j  a  va2  s  .c o m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(SiCombiner.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setPartitionerClass(WordPartitioner.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(5);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.sirius.hadoop.job.onlinetime.OnlineTimeJob.java

License:Apache License

public Job build() throws Exception {
    //init//w w  w . j  av  a 2  s.  c o m
    Job job = Job.getInstance(getConf(), "onlinetime");
    job.setJarByClass(OnlineTimeJob.class);

    //mapp
    job.setMapperClass(StatusMapper.class);
    job.setMapOutputKeyClass(StatusKey.class);
    job.setMapOutputValueClass(OnlineRecord.class);

    //custom partition
    job.setPartitionerClass(StatusKeyPartitioner.class);

    //reduce
    job.setGroupingComparatorClass(StatusKeyGroupComparator.class);
    job.setReducerClass(StatusReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //input
    FileInputFormat.setInputPaths(job, new Path("/subscriber_status/subscriber_status.json"));

    //output
    FileOutputFormat.setOutputPath(job, out);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, Lz4Codec.class);

    return job;
}

From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java

License:Apache License

/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf.// ww w  . j a va2  s.  c o  m
 *
 * @param table  The output Splice table name, The format should be Schema.tableName.
 * @param reducer  The reducer class to use.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary configuration.
 * @param partitioner  Partitioner to use. Pass <code>null</code> to use
 * default partitioner.
 * @param quorumAddress Distant cluster to write to; default is null for
 * output to the cluster that is designated in <code>hbase-site.xml</code>.
 * Set this String to the zookeeper ensemble of an alternate remote cluster
 * when you would have the reduce write a cluster that is other than the
 * default; e.g. copying tables between clusters, the source would be
 * designated by <code>hbase-site.xml</code> and this param would have the
 * ensemble address of the remote cluster.  The format to pass is particular.
 * Pass <code> &lt;hbase.zookeeper.quorum>:&lt;hbase.zookeeper.client.port>:&lt;zookeeper.znode.parent>
 * </code> such as <code>server,server2,server3:2181:/hbase</code>.
 * @param serverClass redefined hbase.regionserver.class
 * @param serverImpl redefined hbase.regionserver.client
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When determining the region count fails.
 * @throws SQLException
 */
public static void initTableReducerJob(String table, Class<? extends Reducer> reducer, Job job,
        Class partitioner, String quorumAddress, String serverClass, String serverImpl,
        boolean addDependencyJars, Class<? extends OutputFormat> outputformatClass) throws IOException {

    Configuration conf = job.getConfiguration();
    job.setOutputFormatClass(outputformatClass);
    if (reducer != null)
        job.setReducerClass(reducer);
    conf.set(MRConstants.SPLICE_OUTPUT_TABLE_NAME, table);
    if (sqlUtil == null)
        sqlUtil = SMSQLUtil.getInstance(conf.get(MRConstants.SPLICE_JDBC_STR));
    // If passed a quorum/ensemble address, pass it on to TableOutputFormat.
    String hbaseTableID = null;
    try {
        hbaseTableID = sqlUtil.getConglomID(table);
    } catch (SQLException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        throw new IOException(e);
    }
    conf.set(MRConstants.HBASE_OUTPUT_TABLE_NAME, table);

    if (quorumAddress != null) {
        // Calling this will validate the format
        HBasePlatformUtils.validateClusterKey(quorumAddress);
        conf.set(TableOutputFormat.QUORUM_ADDRESS, quorumAddress);
    }
    if (serverClass != null && serverImpl != null) {
        conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
        conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);

    }
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Object.class);
    if (partitioner == HRegionPartitioner.class) {
        job.setPartitionerClass(HRegionPartitioner.class);
        // TODO Where are the keys?
        int regions = getReduceNumberOfRegions(hbaseTableID);
        if (job.getNumReduceTasks() > regions) {
            job.setNumReduceTasks(regions);
        }
    } else if (partitioner != null) {
        job.setPartitionerClass(partitioner);
    }

    if (addDependencyJars) {
        addDependencyJars(job);
    }

    //initCredentials(job);
}

From source file:com.sreejith.loganalyzer.mapreduce.LogDriver.java

License:Apache License

public static void main(String[] args) throws Exception {
    Job job = new Job();
    job.setJarByClass(LogDriver.class);
    job.setJobName("Log Analyzer");

    job.setMapperClass(LogMapper.class);
    job.setPartitionerClass(LogPartitioner.class);
    job.setCombinerClass(LogReducer.class);
    job.setReducerClass(LogReducer.class);

    job.setNumReduceTasks(2);/* ww  w .  j  a v  a2s  . co m*/

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);

}