Example usage for org.apache.hadoop.mapreduce Job setJarByClass

List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJarByClass.

Prototype

public void setJarByClass(Class<?> cls) 

Source Link

Document

Set the Jar by finding where a given class came from.

Usage

From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceColorCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    org.apache.log4j.BasicConfigurator.configure();

    if (args.length != 2) {
        System.err.println("Usage: MapReduceColorCount <input path> <output path>");
        return -1;
    }/*from   w  w  w.ja  va  2s  .c  o m*/

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceColorCount.class);
    job.setJobName("Color Count");

    // RECORDSERVICE:
    // To read from a table instead of a path, comment out
    // FileInputFormat.setInputPaths() and instead use:
    //FileInputFormat.setInputPaths(job, new Path(args[0]));
    RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users");

    // RECORDSERVICE:
    // Use the RecordService version of the AvroKeyInputFormat
    job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class);
    //job.setInputFormatClass(AvroKeyInputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(ColorCountMapper.class);
    AvroJob.setInputKeySchema(job, User.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cloudera.recordservice.examples.mapreduce.RecordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: RecordCount <input_query> <output_path>");
        System.exit(1);//from w  w w .ja  v  a  2s  . com
    }
    String inputQuery = args[0];
    String output = args[1];

    Job job = Job.getInstance(getConf());
    job.setJobName("recordcount");
    job.setJarByClass(RecordCount.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(LongWritable.class);

    RecordServiceConfig.setInputQuery(job.getConfiguration(), inputQuery);
    job.setInputFormatClass(RecordServiceInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path outputPath = new Path(output);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.recordservice.examples.terasort.TeraChecksum.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    boolean useRecordService = false;
    Job job = Job.getInstance(getConf());
    if (args.length != 2 && args.length != 3) {
        usage();//from   w  w  w. j  a va  2s  .  c  o  m
        return 2;
    }
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    }

    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraSum");
    job.setJarByClass(TeraChecksum.class);
    job.setMapperClass(ChecksumMapper.class);
    job.setReducerClass(ChecksumReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Unsigned16.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    if (useRecordService) {
        RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);
        job.setInputFormatClass(RecordServiceTeraInputFormat.class);
    } else {
        TeraInputFormat.setInputPaths(job, new Path(args[0]));
        job.setInputFormatClass(TeraInputFormat.class);
    }
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.recordservice.examples.terasort.TeraGen.java

License:Apache License

/**
 * @param args the cli arguments/*from w w  w  .  j a  v  a2s .  co m*/
 */
@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();
        return 2;
    }
    setNumberOfRows(job, parseHumanLong(args[0]));
    Path outputDir = new Path(args[1]);
    if (outputDir.getFileSystem(getConf()).exists(outputDir)) {
        throw new IOException("Output directory " + outputDir + " already exists.");
    }
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraGen");
    job.setJarByClass(TeraGen.class);
    job.setMapperClass(SortGenMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(RangeInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.recordservice.examples.terasort.TeraSort.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    boolean useRecordService = false;
    if (args.length != 2 && args.length != 3) {
        usage();//w w  w.  j  a  v  a  2s .c o  m
        return 1;
    }
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    }

    LOG.info("starting");
    Job job = Job.getInstance(getConf());
    boolean useSimplePartitioner = getUseSimplePartitioner(job);

    if (useRecordService) {
        RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);
        job.setInputFormatClass(RecordServiceTeraInputFormat.class);
        useSimplePartitioner = true;
    } else {
        Path inputDir = new Path(args[0]);
        TeraInputFormat.setInputPaths(job, inputDir);
        job.setInputFormatClass(TeraInputFormat.class);
    }

    Path outputDir = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    if (useSimplePartitioner) {
        job.setPartitionerClass(SimplePartitioner.class);
    } else {
        long start = System.currentTimeMillis();
        Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME);
        URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
        try {
            TeraInputFormat.writePartitionFile(job, partitionFile);
        } catch (Throwable e) {
            LOG.error(e.getMessage());
            return -1;
        }
        job.addCacheFile(partitionUri);
        long end = System.currentTimeMillis();
        System.out.println("Spent " + (end - start) + "ms computing partitions.");
        job.setPartitionerClass(TotalOrderPartitioner.class);
    }

    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
    TeraOutputFormat.setFinalSync(job, true);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    LOG.info("done");
    return ret;
}

From source file:com.cloudera.recordservice.examples.terasort.TeraValidate.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    boolean useRecordService = false;
    if (args.length != 2 && args.length != 3) {
        usage();//from  w  ww .j  av  a  2 s  .  c om
        return 1;
    }
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    }

    Job job = Job.getInstance(getConf());
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraValidate");
    job.setJarByClass(TeraValidate.class);
    job.setMapperClass(ValidateMapper.class);
    job.setReducerClass(ValidateReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    // force a single split
    FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
    if (useRecordService) {
        RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);
        job.setInputFormatClass(RecordServiceTeraInputFormat.class);
    } else {
        TeraInputFormat.setInputPaths(job, new Path(args[0]));
        job.setInputFormatClass(TeraInputFormat.class);
    }
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver.java

@Override
public int run(String[] args) throws Exception {
    Configuration config = getConf();
    args = new GenericOptionsParser(config, args).getRemainingArgs();

    if (args.length < 6) {
        /*System.out.println("hadoop jar HBASEBulkLoad.jar "
         + "com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver"
         + " <inputpath> <outputpath> <hbaseTable> <hbaseColumnFamily"
         + " \"<hbaseColumns (delimiter seperated)>\" <column delimiter>");*/
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }/*from   w w  w . j a v  a  2 s  .  c  o  m*/

    String hbaseTab = args[2];
    String hbaseColumnFamily = args[3];
    String hbaseColumns = args[4];
    String hbaseColumnSeperator = args[5];
    config.set(HBASEBulkLoadConstants.HBASE_TABLE_KEY, hbaseTab.trim().toLowerCase(Locale.ENGLISH));
    config.set(HBASEBulkLoadConstants.HBASE_COLUMN_FAMILY_KEY, hbaseColumnFamily);
    config.set(HBASEBulkLoadConstants.HBASE_COLUMNS_KEY, hbaseColumns.trim().toLowerCase(Locale.ENGLISH));
    config.set(HBASEBulkLoadConstants.HBASE_COLUMN_SEPERATOR_KEY, hbaseColumnSeperator);
    System.out.println(2);
    Job job = Job.getInstance(config, this.getClass().getName() + "-" + hbaseTab);
    HBaseConfiguration.addHbaseResources(config);

    job.setInputFormatClass(TextInputFormat.class);
    job.setJarByClass(HBASEBulkLoadDriver.class);
    job.setMapperClass(HBASEBulkLoadKeyValueMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(Put.class);
    job.setCombinerClass(PutCombiner.class);
    job.setReducerClass(PutSortReducer.class);

    Connection connection = ConnectionFactory.createConnection(config);
    Table hTab = connection.getTable(TableName.valueOf(hbaseTab));

    FileSystem.get(getConf()).delete(new Path(args[1]), true);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //job.setOutputFormatClass(HFileOutputFormat2.class);
    TableMapReduceUtil.initTableReducerJob(hTab.getName().getNameAsString(), null, job);
    //job.setNumReduceTasks(0);
    TableMapReduceUtil.addDependencyJars(job);
    HFileOutputFormat2.configureIncrementalLoadMap(job, hTab);

    int exitCode = job.waitForCompletion(true) ? HBASEBulkLoadConstants.SUCCESS
            : HBASEBulkLoadConstants.FAILURE;
    System.out.println(8);
    if (HBASEBulkLoadConstants.SUCCESS == exitCode) {
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config);
        loader.doBulkLoad(new Path(args[1]), (HTable) hTab);
        connection.close();
    }
    return exitCode;
}

From source file:com.cloudera.sa.securewordcount.SecureWordCountDriver.java

@Override
public int run(String[] args) throws Exception {
    Configuration config = getConf();
    args = new GenericOptionsParser(config, args).getRemainingArgs();

    if (args.length < 2) {

        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }/*w  ww.  j  a v  a  2  s.com*/
    Job job = Job.getInstance(config, this.getClass().getName() + "-wordcount");
    job.setJarByClass(SecureWordCountDriver.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:com.cloudera.test.UseHCat.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();

    // Get the input and output table names as arguments
    String inputTableName = args[0];
    String outputTableName = args[1];
    // Assume the default database
    String dbName = null;//from   ww w .  j  a  v a2s  . c  o m

    Job job = new Job(conf, "UseHCat");
    HCatInputFormat.setInput(job, dbName, inputTableName);
    job.setJarByClass(UseHCat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    // An HCatalog record as input
    job.setInputFormatClass(HCatInputFormat.class);

    // Mapper emits a string as key and an integer as value
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // Ignore the key for the reducer output; emitting an HCatalog record as value
    job.setOutputKeyClass(WritableComparable.class);
    job.setOutputValueClass(DefaultHCatRecord.class);
    job.setOutputFormatClass(HCatOutputFormat.class);

    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
    HCatSchema s = HCatOutputFormat.getTableSchema(job);
    System.err.println("INFO: output schema explicitly set for writing:" + s);
    HCatOutputFormat.setSchema(job, s);
    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cloudera.traffic.AveragerRunner.java

License:Apache License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Job job = new Job(conf);
    job.setJarByClass(AveragerRunner.class);
    job.setMapperClass(AveragerMapper.class);
    job.setReducerClass(AveragerReducer.class);
    job.setCombinerClass(AveragerReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(AverageWritable.class);
    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    job.waitForCompletion(true);/*  w w  w.  j  av  a2s . c om*/
}