Example usage for org.apache.hadoop.mapreduce Job setJarByClass

List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJarByClass.

Prototype

public void setJarByClass(Class<?> cls) 

Source Link

Document

Set the Jar by finding where a given class came from.

Usage

From source file:com.daleway.training.hadoop.pagerank.PageRankSecondarySort.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(PageRankSecondarySort.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setSortComparatorClass(LongWritable.DecreasingComparator.class);
    job.setMaxReduceAttempts(1);/*from   w ww. java2  s.c o m*/
    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;
}

From source file:com.daleway.training.hadoop.pagerank.PageRankSimple.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(PageRankSimple.class);
    job.setMapperClass(PageRankMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(PageRankReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;//from   w w  w . j ava  2 s  . c  o m

}

From source file:com.datasalt.pangool.benchmark.secondarysort.HadoopSecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);//from   w  ww. j a v  a 2s . c o m
    }
    Job job = new Job(conf, "Hadoop Secondary Sort");
    FileSystem fS = FileSystem.get(conf);
    fS.delete(new Path(otherArgs[1]), true);

    job.setJarByClass(HadoopSecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setPartitionerClass(KeyPartitioner.class);
    job.setGroupingComparatorClass(GroupingComparator.class);

    job.setMapOutputKeyClass(ComplexType.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    job.waitForCompletion(true);
}

From source file:com.datasalt.pangool.benchmark.wordcount.HadoopWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   w  ww .j  a v  a  2s  .  c o  m
    }
    //conf.setBoolean("hadoop.security.authorization", false);
    //conf.set("hadoop.security.authentication","simple");
    Job job = new Job(conf, "word count");
    job.setJarByClass(HadoopWordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    job.waitForCompletion(true);
}

From source file:com.datasalt.pangool.tuplemr.MapOnlyJobBuilder.java

License:Apache License

public Job createJob() throws IOException, TupleMRException, URISyntaxException {
    // perform a deep copy of the configuration
    this.conf = new Configuration(this.conf);

    String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat";
    try {/*from w  w  w.  ja  va2  s .c o  m*/
        InstancesDistributor.distribute(outputFormat, uniqueName, conf);
        instanceFilesCreated.add(uniqueName);
    } catch (URISyntaxException e1) {
        throw new TupleMRException(e1);
    }

    Job job;
    if (jobName == null) {
        job = new Job(conf);
    } else {
        job = new Job(conf, jobName);
    }
    job.setNumReduceTasks(0);

    job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName);
    job.setOutputFormatClass(ProxyOutputFormat.class);

    if (outputKeyClass == null) {
        throw new TupleMRException("Output spec must be defined, use setOutput()");
    }
    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);
    FileOutputFormat.setOutputPath(job, outputPath);

    Input lastInput = null;

    for (Input input : multipleInputs.getMultiInputs()) {
        if (input.inputProcessor == null) {
            input.inputProcessor = mapOnlyMapper;
            if (input.inputProcessor == null) {
                throw new TupleMRException("Either mapOnlyMapper property or full Input spec must be set.");
            }
        }
        lastInput = input;
    }

    if (lastInput == null) {
        throw new TupleMRException("At least one input must be specified");
    }
    job.setJarByClass((jarByClass != null) ? jarByClass : lastInput.inputProcessor.getClass());

    instanceFilesCreated.addAll(multipleInputs.configureJob(job));
    instanceFilesCreated.addAll(namedOutputs.configureJob(job));

    return job;
}

From source file:com.datasalt.pangool.tuplemr.TupleMRBuilder.java

License:Apache License

public Job createJob() throws IOException, TupleMRException {

    failIfNull(tupleReducer, "Need to set a group handler");
    failIfEmpty(multipleInputs.getMultiInputs(), "Need to add at least one input");
    failIfNull(outputFormat, "Need to set output format");
    failIfNull(outputKeyClass, "Need to set outputKeyClass");
    failIfNull(outputValueClass, "Need to set outputValueClass");
    failIfNull(outputPath, "Need to set outputPath");

    // perform a deep copy of the Configuration
    this.conf = new Configuration(this.conf);

    TupleMRConfig tupleMRConf = buildConf();
    // Serialize PangoolConf in Hadoop Configuration
    instanceFilesCreated.addAll(TupleMRConfig.set(tupleMRConf, conf));
    Job job = (jobName == null) ? new Job(conf) : new Job(conf, jobName);
    if (tupleMRConf.getRollupFrom() != null) {
        job.setReducerClass(RollupReducer.class);
    } else {//  w  w  w . j a  v a  2s  . c  o m
        job.setReducerClass(SimpleReducer.class);
    }

    if (tupleCombiner != null) {
        job.setCombinerClass(SimpleCombiner.class); // not rollup by now
        // Set Combiner Handler
        String uniqueName = UUID.randomUUID().toString() + '.' + "combiner-handler.dat";
        try {
            InstancesDistributor.distribute(tupleCombiner, uniqueName, job.getConfiguration());
            instanceFilesCreated.add(uniqueName);
            job.getConfiguration().set(SimpleCombiner.CONF_COMBINER_HANDLER, uniqueName);
        } catch (URISyntaxException e1) {
            throw new TupleMRException(e1);
        }
    }

    // Set Tuple Reducer
    try {
        String uniqueName = UUID.randomUUID().toString() + '.' + "group-handler.dat";
        InstancesDistributor.distribute(tupleReducer, uniqueName, job.getConfiguration());
        instanceFilesCreated.add(uniqueName);
        job.getConfiguration().set(SimpleReducer.CONF_REDUCER_HANDLER, uniqueName);
    } catch (URISyntaxException e1) {
        throw new TupleMRException(e1);
    }

    // Enabling serialization
    TupleSerialization.enableSerialization(job.getConfiguration());

    job.setJarByClass((jarByClass != null) ? jarByClass : tupleReducer.getClass());
    job.setMapOutputKeyClass(DatumWrapper.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setPartitionerClass(TupleHashPartitioner.class);
    job.setGroupingComparatorClass(GroupComparator.class);
    job.setSortComparatorClass(SortComparator.class);
    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);
    FileOutputFormat.setOutputPath(job, outputPath);
    instanceFilesCreated.addAll(multipleInputs.configureJob(job));
    instanceFilesCreated.addAll(namedOutputs.configureJob(job));
    // Configure a {@link ProxyOutputFormat} for Pangool's Multiple Outputs to
    // work: {@link PangoolMultipleOutput}
    String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat";
    try {
        InstancesDistributor.distribute(outputFormat, uniqueName, conf);
        instanceFilesCreated.add(uniqueName);
    } catch (URISyntaxException e1) {
        throw new TupleMRException(e1);
    }
    job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName);
    job.setOutputFormatClass(ProxyOutputFormat.class);

    return job;
}

From source file:com.datasalt.utils.mapred.counter.MapRedCounter.java

License:Apache License

protected static Job buildMapRedCounterJobWithoutCombiner(String name,
        @SuppressWarnings("rawtypes") Class<? extends OutputFormat> outputFormat, String outPath,
        Configuration conf) throws IOException {

    Job job = new Job(conf, name);

    Path output = new Path(outPath);
    HadoopUtils.deleteIfExists(FileSystem.get(conf), output);
    job.setJarByClass(MapRedCounter.class);

    job.setReducerClass(MapRedCountReducer.class);
    job.setMapOutputKeyClass(CounterKey.class);
    job.setMapOutputValueClass(CounterValue.class);
    job.setOutputFormatClass(outputFormat);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);

    // Secondary sorting configuration.
    job.setGroupingComparatorClass(CounterKey.IdGroupComparator.class);
    job.setPartitionerClass(CounterKey.IdGroupPartitioner.class);

    FileOutputFormat.setOutputPath(job, output);

    String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat";
    try {/* ww w  . j  a v  a2 s .c  o m*/
        DCUtils.serializeToDC(new HadoopOutputFormat(SequenceFileOutputFormat.class), uniqueName, conf);
        job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName);
        job.setOutputFormatClass(ProxyOutputFormat.class);
        // Multioutput configuration
        PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTFILE.toString(),
                new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterKey.class, LongWritable.class);
        PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTDISTINCTFILE.toString(),
                new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterDistinctKey.class,
                LongPairWritable.class);
    } catch (URISyntaxException e) {
        e.printStackTrace();
        throw new IOException(e);
    }
    return job;
}

From source file:com.datasalt.utils.mapred.counter.MapRedCounter.java

License:Apache License

/**
 * Adds an input file and {@link MapRedCounterMapper} to be processed for emit groups and items that then will be
 * counted. Remember you have to implement your own {@link MapRedCounterMapper} to be provided here.
 *//*from   ww w.  ja va  2s  .c  om*/
@SuppressWarnings({ "rawtypes" })
public static void addInput(Job job, Path location, Class<? extends InputFormat> inputFormat,
        Class<? extends MapRedCounterMapper> mapper) throws IOException {

    MultipleInputs.addInputPath(job, location, inputFormat, mapper);
    job.setJarByClass(mapper);
}

From source file:com.declum.squzer.example.hbase.table2file.Export.java

License:Apache License

/**
 * Sets up the actual job.//from w  w  w .j a  v a 2 s .com
 * 
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    Path outputDir = new Path(args[1]);

    Job job = Job.getInstance(conf);
    job.setJobName(tableName);
    job.setJobName(NAME + "_" + tableName);
    job.setJarByClass(Exporter.class);
    // TODO: Allow passing filter and subset of rows/columns.
    Scan s = new Scan();
    // Optional arguments.
    int versions = args.length > 2 ? Integer.parseInt(args[2]) : 1;
    s.setMaxVersions(versions);
    long startTime = args.length > 3 ? Long.parseLong(args[3]) : 0L;
    long endTime = args.length > 4 ? Long.parseLong(args[4]) : Long.MAX_VALUE;
    s.setTimeRange(startTime, endTime);
    s.setCacheBlocks(false);
    if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
        s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
    }
    LOG.info("verisons=" + versions + ", starttime=" + startTime + ", endtime=" + endTime);
    TableMapReduceUtil.initTableMapperJob(tableName, s, Exporter.class, null, null, job);
    // No reducers. Just write straight to output files.
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Result.class);
    FileOutputFormat.setOutputPath(job, outputDir);
    return job;
}

From source file:com.digitalpebble.behemoth.mahout.BehemothDocumentProcessor.java

License:Apache License

/**
 * Convert the input documents into token array using the
 * {@link StringTuple} The input documents has to be in the
 * {@link org.apache.hadoop.io.SequenceFile} format
 * // w  w w  .  j a v  a  2s  . com
 * @param input
 *            input directory of the documents in
 *            {@link org.apache.hadoop.io.SequenceFile} format
 * @param output
 *            output directory were the {@link StringTuple} token array of
 *            each document has to be created
 * @param type
 *            The annotation type representing the tokens
 * @param feature
 *            The name of the features holding the token value
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
public static void tokenizeDocuments(Path input, String type, String feature, Path output)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    // this conf parameter needs to be set enable serialisation of conf
    // values
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.set(TOKEN_TYPE, type);
    conf.set(FEATURE_NAME, feature);

    Job job = new Job(conf);
    job.setJobName("DocumentProcessor::BehemothTokenizer: input-folder: " + input);
    job.setJarByClass(BehemothDocumentProcessor.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(StringTuple.class);
    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(BehemothTokenizerMapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    HadoopUtil.delete(conf, output);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded)
        throw new IllegalStateException("Job failed!");
}