List of usage examples for org.apache.hadoop.mapreduce Job setInputFormatClass
public void setInputFormatClass(Class<? extends InputFormat> cls) throws IllegalStateException
From source file:accismus.benchmark.Verifier.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage : " + this.getClass().getSimpleName() + " <props file> <output dir>"); return 1; }/*from w w w . ja v a2 s. co m*/ Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); AccismusProperties accisumusProps = new AccismusProperties(new File(args[0])); AccismusInputFormat.configure(job, accisumusProps); AccismusInputFormat.fetchFamilies(job, KEY, DUP); job.setInputFormatClass(AccismusInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapperClass(VMapper.class); job.setReducerClass(VReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return 0; }
From source file:adts.ContainingArticle.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "ContainingArticle"); job.setJarByClass(ContainingArticle.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); Path queriesInputPath = new Path(args[0]); Path articlesInputPath = new Path(args[1]); MultipleInputs.addInputPath(job, queriesInputPath, TextInputFormat.class, QueriesMap.class); MultipleInputs.addInputPath(job, articlesInputPath, TextInputFormat.class, ArticlesMap.class); FileOutputFormat.setOutputPath(job, new Path("/root/temporary")); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.waitForCompletion(true);//from w ww . j a va2s.com Job collectingJob = new Job(conf, "ContainingArticle"); collectingJob.setJarByClass(ContainingArticle.class); collectingJob.setOutputKeyClass(IntWritable.class); collectingJob.setOutputValueClass(Text.class); collectingJob.setMapperClass(CollectMap.class); collectingJob.setReducerClass(CollectReduce.class); collectingJob.setInputFormatClass(TextInputFormat.class); collectingJob.setOutputFormatClass(TextOutputFormat.class); collectingJob.setMapOutputKeyClass(Text.class); collectingJob.setMapOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(collectingJob, new Path("/root/temporary")); FileOutputFormat.setOutputPath(collectingJob, new Path("/root/temporary2")); collectingJob.waitForCompletion(true); Job countingJob = new Job(conf, "ContainingArticle"); countingJob.setJarByClass(ContainingArticle.class); countingJob.setOutputKeyClass(Text.class); countingJob.setOutputValueClass(IntWritable.class); countingJob.setMapperClass(CountMap.class); countingJob.setReducerClass(CountReduce.class); countingJob.setInputFormatClass(TextInputFormat.class); countingJob.setOutputFormatClass(TextOutputFormat.class); countingJob.setMapOutputKeyClass(IntWritable.class); countingJob.setMapOutputValueClass(Text.class); FileInputFormat.addInputPath(countingJob, new Path("/root/temporary2")); FileOutputFormat.setOutputPath(countingJob, new Path(args[2])); countingJob.waitForCompletion(true); }
From source file:adts.PopularKeywords.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "PopularKeywords"); job.setJarByClass(PopularKeywords.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); Path queriesInputPath = new Path(args[0]); Path StopWordsInputPath = new Path(args[1]); MultipleInputs.addInputPath(job, queriesInputPath, TextInputFormat.class, Map.class); MultipleInputs.addInputPath(job, StopWordsInputPath, TextInputFormat.class, StopwordsMap.class); FileOutputFormat.setOutputPath(job, new Path("/root/temporary")); job.waitForCompletion(true);// w w w .ja v a 2 s . co m Job sortingJob = new Job(conf, "PopularKeywords"); sortingJob.setJarByClass(PopularKeywords.class); sortingJob.setOutputKeyClass(Text.class); sortingJob.setOutputValueClass(LongWritable.class); sortingJob.setMapperClass(ReverseMap.class); sortingJob.setReducerClass(ReverseReduce.class); sortingJob.setInputFormatClass(TextInputFormat.class); sortingJob.setOutputFormatClass(TextOutputFormat.class); sortingJob.setSortComparatorClass(LongWritable.DecreasingComparator.class); sortingJob.setMapOutputKeyClass(LongWritable.class); sortingJob.setMapOutputValueClass(Text.class); FileInputFormat.addInputPath(sortingJob, new Path("/root/temporary")); FileOutputFormat.setOutputPath(sortingJob, new Path(args[2])); sortingJob.setNumReduceTasks(1); sortingJob.waitForCompletion(true); }
From source file:adts.PrepareInput.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "PrepareInput"); job.setJarByClass(PrepareInput.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(1);/*from w w w .j a v a 2s . com*/ job.waitForCompletion(true); }
From source file:adts.SuccessfullQueries.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "SuccessfullQueries"); job.setJarByClass(SuccessfullQueries.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true);//from w w w. ja v a2s . c o m }
From source file:alluxio.checker.MapReduceIntegrationChecker.java
License:Apache License
/** * Implements MapReduce with Alluxio integration checker. * * @return 0 for success, 2 for unable to find Alluxio classes, 1 otherwise *//*w w w . ja v a 2s . c o m*/ private int run(String[] args) throws Exception { Configuration conf = new Configuration(); String numMaps = new GenericOptionsParser(conf, args).getRemainingArgs()[0]; conf.set(MRJobConfig.NUM_MAPS, numMaps); createHdfsFilesystem(conf); Job job = Job.getInstance(conf, "MapReduceIntegrationChecker"); job.setJarByClass(MapReduceIntegrationChecker.class); job.setMapperClass(CheckerMapper.class); job.setCombinerClass(CheckerReducer.class); job.setReducerClass(CheckerReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(EmptyInputFormat.class); FileOutputFormat.setOutputPath(job, mOutputFilePath); try { if (!job.waitForCompletion(true)) { return 1; } Status resultStatus = generateReport(); return resultStatus.equals(Status.SUCCESS) ? 0 : (resultStatus.equals(Status.FAIL_TO_FIND_CLASS) ? 2 : 1); } finally { if (mFileSystem.exists(mOutputFilePath)) { mFileSystem.delete(mOutputFilePath, true); } mFileSystem.close(); } }
From source file:alluxio.examples.keyvalue.hadoop.CloneStoreMapReduce.java
License:Apache License
/** * @param args two parameters, the first is the input key-value store path, the second is the * output key-value store path//from ww w . j a va 2 s . co m * @throws Exception if any exception happens */ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // NOTE(binfan): we are using the deprecated constructor of Job instance to compile with // hadoop-1.0. If this is not a concern, a better way is // Job job = Job.getInstance(conf); Job job = new Job(conf); job.setJobName("CloneStoreMapReduce"); job.setJarByClass(CloneStoreMapReduce.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setMapperClass(CloneStoreMapper.class); job.setReducerClass(CloneStoreReducer.class); job.setInputFormatClass(KeyValueInputFormat.class); job.setOutputFormatClass(KeyValueOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:at.illecker.hama.rootbeer.examples.util.RandomGraphGenerator.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 4) { System.out.println(//from ww w. j av a2 s. c om "USAGE: <Number of vertices> <Number of edges per vertex> <Number of partitions> <Outpath>"); return; } System.out.println(Arrays.toString(args)); Configuration conf = new Configuration(); conf.setInt("hama.num.vertices", Integer.parseInt(args[0])); conf.setInt("hama.num.partitions", Integer.parseInt(args[2])); conf.setInt("number.edges", Integer.parseInt(args[1])); Job job = new Job(conf); Path generated = new Path(new Path(args[3]).getParent(), "generated"); FileOutputFormat.setOutputPath(job, generated); FileSystem.get(conf).delete(generated, true); job.setJobName("RangeWriter"); job.setJarByClass(SortGenMapper.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setInputFormatClass(RangeInputFormat.class); job.waitForCompletion(true); conf.setInt("max.id", Integer.valueOf(args[0])); job = new Job(conf); FileOutputFormat.setOutputPath(job, new Path(args[3])); FileSystem.get(conf).delete(new Path(args[3]), true); job.setJobName("Random Vertex Writer"); FileInputFormat.addInputPath(job, generated); job.setJarByClass(RandomMapper.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(conf.getInt("hama.num.partitions", 2)); job.setPartitionerClass(HashPartitioner.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.waitForCompletion(true); }
From source file:AverageProj.AveragePrice.java
public int run(String[] args) throws Exception, ClassNotFoundException { Configuration conf = getConf(); Job job = new Job(conf, "Avg"); job.setJarByClass(AveragePrice.class); //final File f = new File(AveragePrice.class.getProtectionDomain().getCodeSource().getLocation().getPath()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setMapperClass(AvMapper.class); job.setMapOutputKeyClass(YearPrice.class); job.setMapOutputValueClass(DoubleWritable.class); job.setGroupingComparatorClass(YearSymComparator.class); // job.setCombinerClass(AvReducer.class); job.setReducerClass(AvReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(YearPrice.class); job.setOutputValueClass(DoubleWritable.class); job.setPartitionerClass(AvgPartitioner.class); job.setNumReduceTasks(7);/* w ww.j a va2s .co m*/ System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
From source file:avro.mr.MapReduceAvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: AvroWordCount <input path> <output path>"); return -1; }/* w w w .j a va 2s . c o m*/ Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceAvroWordCount.class); job.setJobName("wordcount"); // We call setOutputSchema first so we can override the configuration // parameters it sets AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT))); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setSortComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return 0; }