Example usage for org.apache.hadoop.mapreduce Job setInputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setInputFormatClass.

Prototype

public void setInputFormatClass(Class<? extends InputFormat> cls) throws IllegalStateException

Source Link

Document

Set the InputFormat for the job.

Usage

From source file:accismus.benchmark.Verifier.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage : " + this.getClass().getSimpleName() + " <props file> <output dir>");
        return 1;
    }/*from w  w w  . ja  v a2 s. co  m*/

    Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());

    AccismusProperties accisumusProps = new AccismusProperties(new File(args[0]));

    AccismusInputFormat.configure(job, accisumusProps);
    AccismusInputFormat.fetchFamilies(job, KEY, DUP);

    job.setInputFormatClass(AccismusInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setMapperClass(VMapper.class);

    job.setReducerClass(VReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);

    TextOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);

    return 0;
}

From source file:adts.ContainingArticle.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "ContainingArticle");
    job.setJarByClass(ContainingArticle.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path queriesInputPath = new Path(args[0]);
    Path articlesInputPath = new Path(args[1]);
    MultipleInputs.addInputPath(job, queriesInputPath, TextInputFormat.class, QueriesMap.class);
    MultipleInputs.addInputPath(job, articlesInputPath, TextInputFormat.class, ArticlesMap.class);

    FileOutputFormat.setOutputPath(job, new Path("/root/temporary"));
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.waitForCompletion(true);//from w  ww . j  a va2s.com

    Job collectingJob = new Job(conf, "ContainingArticle");
    collectingJob.setJarByClass(ContainingArticle.class);

    collectingJob.setOutputKeyClass(IntWritable.class);
    collectingJob.setOutputValueClass(Text.class);

    collectingJob.setMapperClass(CollectMap.class);
    collectingJob.setReducerClass(CollectReduce.class);

    collectingJob.setInputFormatClass(TextInputFormat.class);
    collectingJob.setOutputFormatClass(TextOutputFormat.class);
    collectingJob.setMapOutputKeyClass(Text.class);
    collectingJob.setMapOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(collectingJob, new Path("/root/temporary"));
    FileOutputFormat.setOutputPath(collectingJob, new Path("/root/temporary2"));

    collectingJob.waitForCompletion(true);

    Job countingJob = new Job(conf, "ContainingArticle");
    countingJob.setJarByClass(ContainingArticle.class);

    countingJob.setOutputKeyClass(Text.class);
    countingJob.setOutputValueClass(IntWritable.class);

    countingJob.setMapperClass(CountMap.class);
    countingJob.setReducerClass(CountReduce.class);

    countingJob.setInputFormatClass(TextInputFormat.class);
    countingJob.setOutputFormatClass(TextOutputFormat.class);
    countingJob.setMapOutputKeyClass(IntWritable.class);
    countingJob.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(countingJob, new Path("/root/temporary2"));
    FileOutputFormat.setOutputPath(countingJob, new Path(args[2]));

    countingJob.waitForCompletion(true);
}

From source file:adts.PopularKeywords.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "PopularKeywords");
    job.setJarByClass(PopularKeywords.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path queriesInputPath = new Path(args[0]);
    Path StopWordsInputPath = new Path(args[1]);
    MultipleInputs.addInputPath(job, queriesInputPath, TextInputFormat.class, Map.class);
    MultipleInputs.addInputPath(job, StopWordsInputPath, TextInputFormat.class, StopwordsMap.class);

    FileOutputFormat.setOutputPath(job, new Path("/root/temporary"));

    job.waitForCompletion(true);//  w  w  w  .ja v  a  2  s  . co m

    Job sortingJob = new Job(conf, "PopularKeywords");
    sortingJob.setJarByClass(PopularKeywords.class);

    sortingJob.setOutputKeyClass(Text.class);
    sortingJob.setOutputValueClass(LongWritable.class);

    sortingJob.setMapperClass(ReverseMap.class);
    sortingJob.setReducerClass(ReverseReduce.class);

    sortingJob.setInputFormatClass(TextInputFormat.class);
    sortingJob.setOutputFormatClass(TextOutputFormat.class);
    sortingJob.setSortComparatorClass(LongWritable.DecreasingComparator.class);
    sortingJob.setMapOutputKeyClass(LongWritable.class);
    sortingJob.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(sortingJob, new Path("/root/temporary"));
    FileOutputFormat.setOutputPath(sortingJob, new Path(args[2]));

    sortingJob.setNumReduceTasks(1);
    sortingJob.waitForCompletion(true);
}

From source file:adts.PrepareInput.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "PrepareInput");
    job.setJarByClass(PrepareInput.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setNumReduceTasks(1);/*from w  w w .j  a  v a 2s  . com*/
    job.waitForCompletion(true);
}

From source file:adts.SuccessfullQueries.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "SuccessfullQueries");
    job.setJarByClass(SuccessfullQueries.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);//from  w w w. ja  v  a2s .  c o m
}

From source file:alluxio.checker.MapReduceIntegrationChecker.java

License:Apache License

/**
 * Implements MapReduce with Alluxio integration checker.
 *
 * @return 0 for success, 2 for unable to find Alluxio classes, 1 otherwise
 *//*w w  w  .  ja v  a  2s  .  c o  m*/
private int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String numMaps = new GenericOptionsParser(conf, args).getRemainingArgs()[0];
    conf.set(MRJobConfig.NUM_MAPS, numMaps);
    createHdfsFilesystem(conf);

    Job job = Job.getInstance(conf, "MapReduceIntegrationChecker");
    job.setJarByClass(MapReduceIntegrationChecker.class);
    job.setMapperClass(CheckerMapper.class);
    job.setCombinerClass(CheckerReducer.class);
    job.setReducerClass(CheckerReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(EmptyInputFormat.class);
    FileOutputFormat.setOutputPath(job, mOutputFilePath);

    try {
        if (!job.waitForCompletion(true)) {
            return 1;
        }
        Status resultStatus = generateReport();
        return resultStatus.equals(Status.SUCCESS) ? 0
                : (resultStatus.equals(Status.FAIL_TO_FIND_CLASS) ? 2 : 1);
    } finally {
        if (mFileSystem.exists(mOutputFilePath)) {
            mFileSystem.delete(mOutputFilePath, true);
        }
        mFileSystem.close();
    }
}

From source file:alluxio.examples.keyvalue.hadoop.CloneStoreMapReduce.java

License:Apache License

/**
 * @param args two parameters, the first is the input key-value store path, the second is the
 *    output key-value store path//from ww  w . j  a  va  2 s .  co  m
 * @throws Exception if any exception happens
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    // NOTE(binfan): we are using the deprecated constructor of Job instance to compile with
    // hadoop-1.0. If this is not a concern, a better way is
    //     Job job = Job.getInstance(conf);
    Job job = new Job(conf);

    job.setJobName("CloneStoreMapReduce");
    job.setJarByClass(CloneStoreMapReduce.class);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setMapperClass(CloneStoreMapper.class);
    job.setReducerClass(CloneStoreReducer.class);

    job.setInputFormatClass(KeyValueInputFormat.class);
    job.setOutputFormatClass(KeyValueOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:at.illecker.hama.rootbeer.examples.util.RandomGraphGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 4) {
        System.out.println(//from  ww  w. j av  a2 s. c om
                "USAGE: <Number of vertices> <Number of edges per vertex> <Number of partitions> <Outpath>");
        return;
    }
    System.out.println(Arrays.toString(args));
    Configuration conf = new Configuration();
    conf.setInt("hama.num.vertices", Integer.parseInt(args[0]));
    conf.setInt("hama.num.partitions", Integer.parseInt(args[2]));
    conf.setInt("number.edges", Integer.parseInt(args[1]));
    Job job = new Job(conf);

    Path generated = new Path(new Path(args[3]).getParent(), "generated");
    FileOutputFormat.setOutputPath(job, generated);
    FileSystem.get(conf).delete(generated, true);

    job.setJobName("RangeWriter");

    job.setJarByClass(SortGenMapper.class);
    job.setMapperClass(SortGenMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    job.setInputFormatClass(RangeInputFormat.class);

    job.waitForCompletion(true);
    conf.setInt("max.id", Integer.valueOf(args[0]));
    job = new Job(conf);

    FileOutputFormat.setOutputPath(job, new Path(args[3]));
    FileSystem.get(conf).delete(new Path(args[3]), true);

    job.setJobName("Random Vertex Writer");

    FileInputFormat.addInputPath(job, generated);

    job.setJarByClass(RandomMapper.class);
    job.setMapperClass(RandomMapper.class);
    job.setReducerClass(Reducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(conf.getInt("hama.num.partitions", 2));
    job.setPartitionerClass(HashPartitioner.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.waitForCompletion(true);
}

From source file:AverageProj.AveragePrice.java

public int run(String[] args) throws Exception, ClassNotFoundException {
    Configuration conf = getConf();
    Job job = new Job(conf, "Avg");
    job.setJarByClass(AveragePrice.class);
    //final File f = new File(AveragePrice.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setMapperClass(AvMapper.class);
    job.setMapOutputKeyClass(YearPrice.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setGroupingComparatorClass(YearSymComparator.class);
    // job.setCombinerClass(AvReducer.class);
    job.setReducerClass(AvReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(YearPrice.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setPartitionerClass(AvgPartitioner.class);
    job.setNumReduceTasks(7);/* w  ww.j a va2s .co m*/
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
}

From source file:avro.mr.MapReduceAvroWordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: AvroWordCount <input path> <output path>");
        return -1;
    }/*  w  w  w  .j a  va  2s . c  o  m*/

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceAvroWordCount.class);
    job.setJobName("wordcount");

    // We call setOutputSchema first so we can override the configuration
    // parameters it sets
    AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT)));
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setSortComparatorClass(Text.Comparator.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);

    return 0;
}