Example usage for org.apache.hadoop.mapreduce Job setJarByClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJarByClass.

Prototype

public void setJarByClass(Class<?> cls)

Source Link

Document

Set the Jar by finding where a given class came from.

Usage

From source file:com.github.ygf.pagerank.PageRank.java

License:Apache License

private void summarizeResults(int iter, Configuration conf, Path outputDir) throws Exception {

    // This job creates a plain text file with the top N PageRanks and the
    // titles of the pages. Each map task emits the top N PageRanks it
    // receives, and the reduce task merges the partial results into the
    // global top N PageRanks. A single reducer is used in the job in order
    // to have access to all the individual top N PageRanks from the
    // mappers. The reducer looks up the titles in the index built by
    // TitleIndex. This job was designed considering that N is small.

    int topResults = Integer.parseInt(conf.get("pagerank.top_results"));

    Job job = Job.getInstance(conf, "PageRank:TopN");

    job.setJarByClass(PageRank.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(PageRankTopNMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setReducerClass(PageRankTopNReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(FloatWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(outputDir, "v" + iter));
    FileOutputFormat.setOutputPath(job, new Path(outputDir, "v" + iter + "-top" + topResults));

    job.setNumReduceTasks(1);/*from w ww  . j  ava  2  s  .  c  o  m*/
    job.waitForCompletion(true);
}

From source file:com.github.ygf.pagerank.TitleIndex.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.out.println("Usage: TitleIndex <titles-sorted.txt> <output-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//from  w w w  .  j a v a2 s  .c  om

    Path titlesFile = new Path(args[0]);
    Path outputDir = new Path(args[1]);

    Configuration conf = getConf();

    // Do not create _SUCCESS files. MapFileOutputFormat.getReaders calls
    // try to read the _SUCCESS as another MapFile dir.
    conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "false");

    // This job creates a MapFile of the titles indexed by the page id.
    // UnsplittableTextInputFormat is used to ensure that the same map task
    // gets all the lines in the titlesFile and it can count the line
    // numbers. The number of reduce tasks is set to 0.

    Job job = Job.getInstance(conf, "TitleIndex");

    job.setJarByClass(InLinks.class);
    job.setInputFormatClass(UnsplittableTextInputFormat.class);
    job.setMapperClass(TitleIndexMapper.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, titlesFile);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setNumReduceTasks(0);
    job.waitForCompletion(true);

    return 0;
}

From source file:com.goldsaxfoundation.bigdata.Module5.SimpleMapReduce.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(SimpleMapReduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);/*from   w  w w.  java2 s  . c  o  m*/
}

From source file:com.google.cloud.bigtable.mapreduce.Export.java

License:Apache License

/**
 * Sets up the actual job.//from w  w  w .  ja v a 2s  . c  om
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws java.io.IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    conf.setIfUnset("hbase.client.connection.impl", BigtableConfiguration.getConnectionClass().getName());
    conf.setIfUnset(BigtableOptionsFactory.BIGTABLE_RPC_TIMEOUT_MS_KEY, "60000");
    conf.setBoolean(TableInputFormat.SHUFFLE_MAPS, true);

    String tableName = args[0];
    Path outputDir = new Path(args[1]);
    Job job = Job.getInstance(conf, NAME + "_" + tableName);
    job.setJobName(NAME + "_" + tableName);
    job.setJarByClass(Export.class);
    // Set optional scan parameters
    Scan s = getConfiguredScanForJob(conf, args);
    TableMapReduceUtil.initTableMapperJob(tableName, s, IdentityTableMapper.class, ImmutableBytesWritable.class,
            Result.class, job, false);
    // No reducers.  Just write straight to output files.
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Result.class);
    FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs.
    return job;
}

From source file:com.google.cloud.bigtable.mapreduce.Import.java

License:Open Source License

/**
 * Sets up the actual job./*  w  ww.  j a  v a2  s.  c  om*/
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    TableName tableName = TableName.valueOf(args[0]);
    conf.set(TABLE_NAME, tableName.getNameAsString());
    Path inputDir = new Path(args[1]);
    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(Importer.class);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);

    // make sure we get the filter in the jars
    try {
        Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
        if (filter != null) {
            TableMapReduceUtil.addDependencyJars(conf, filter);
        }
    } catch (Exception e) {
        throw new IOException(e);
    }

    if (hfileOutPath != null) {
        job.setMapperClass(KeyValueImporter.class);
        try (Connection conn = ConnectionFactory.createConnection(conf);
                Table table = conn.getTable(tableName);
                RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
            job.setReducerClass(KeyValueSortReducer.class);
            Path outputDir = new Path(hfileOutPath);
            FileOutputFormat.setOutputPath(job, outputDir);
            job.setMapOutputKeyClass(ImmutableBytesWritable.class);
            job.setMapOutputValueClass(KeyValue.class);
            HFileOutputFormat2.configureIncrementalLoad(job, table, regionLocator);
            TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
                    com.google.common.base.Preconditions.class);
        }
    } else {
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // because it sets up the TableOutputFormat.
        job.setMapperClass(Importer.class);
        TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
        job.setNumReduceTasks(0);
    }
    return job;
}

From source file:com.gsinnovations.howdah.AbstractJob.java

License:Apache License

protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {

    Job job = new Job(new Configuration(getConf()));
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }/*w  w w . jav  a2  s  .c o m*/
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setJobName(getCustomJobName(job, mapper, reducer));

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.gsinnovations.howdah.Driver.java

License:Apache License

public static void job(Path input, Path output, int numReduceTasks)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();

    Job job = new Job(conf);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(TikaMapper.class);
    //job.setCombinerClass(KMeansCombiner.class);
    //job.setReducerClass(KMeansReducer.class);
    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setJarByClass(Driver.class);
    HadoopUtil.overwriteOutput(output);//from  www . j a v  a2  s.  co  m
    job.waitForCompletion(true);

}

From source file:com.gsvic.csmr.CSMRBase.java

License:Apache License

public static void generatePairs(String in, String out)
        throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    path = out;//from w  w  w . j  ava  2s.c  o m
    Job job;
    Path input, output;
    input = new Path(in);
    output = new Path(path + "/CSMRPairs");

    job = new Job(conf);
    job.setJobName("CSMR Pairs Job");
    job.setJarByClass(CSMRBase.class);

    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(CSMRMapper.class);
    job.setReducerClass(CSMRReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorArrayWritable.class);

    job.waitForCompletion(true);
}

From source file:com.gsvic.csmr.CSMRBase.java

License:Apache License

public static void StartCSMR() throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    Job job;
    job = new Job(conf);
    job.setJobName("CSMR Cosine Similarity Job");
    job.setJarByClass(CSMRBase.class);

    FileInputFormat.addInputPath(job, new Path(path + "/CSMRPairs/part-r-00000"));
    FileOutputFormat.setOutputPath(job, new Path(path + "/Results"));
    job.setMapperClass(Mapper.class);
    job.setReducerClass(CosineSimilarityReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VectorArrayWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    System.exit(job.waitForCompletion(true) ? 1 : 0);

}

From source file:com.hadoop.examples.secondSort.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?/*from w  w w .  j a  v  a  2  s . c o  m*/
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        System.exit(2);
    }
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // ???CombinerCombiner<Text, IntWritable>Reduce<IntPair, IntWritable>?
    //job.setCombinerClass(Reduce.class);

    // Reducer
    job.setReducerClass(Reduce.class);

    // *
    // *group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    //setSortComparatorClass()hadoopkey?(?2.Hadoopkey?)
    //IntPair?compareTo()
    //job.setSortComparatorClass(cls);
    // *
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}