List of usage examples for org.apache.hadoop.mapreduce Job setInputFormatClass
public void setInputFormatClass(Class<? extends InputFormat> cls) throws IllegalStateException
From source file:com.neu.cs6240.Xml2csvComments.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Setting up the xml tag configurator for splitter conf.set("xmlinput.start", "<row "); conf.set("xmlinput.end", " />"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: Xml2csvPosts <in> <out>"); System.exit(2);/*w ww . ja v a 2 s. c o m*/ } Job job = new Job(conf, "Converts Posts.xml to .csv"); job.setJarByClass(Xml2csvPosts.class); job.setInputFormatClass(XmlInputFormat.class); job.setMapperClass(CommentsMapper.class); job.setReducerClass(CommentsReducer.class); job.setPartitionerClass(PostsPartitioner.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set as per your file size job.setNumReduceTasks(10); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.neu.cs6240.Xml2csvPosts.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Setting up the xml tag configurator for splitter conf.set("xmlinput.start", "<row "); conf.set("xmlinput.end", " />"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: Xml2csvPosts <in> <out>"); System.exit(2);/*from ww w .j av a 2s . co m*/ } Job job = new Job(conf, "Converts Posts.xml to .csv"); job.setJarByClass(Xml2csvPosts.class); job.setInputFormatClass(XmlInputFormat.class); job.setMapperClass(PostsMapper.class); job.setReducerClass(PostsReducer.class); job.setPartitionerClass(PostsPartitioner.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set as per your file size job.setNumReduceTasks(15); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.nikoo28.excel.mapreduce.ExcelDriver.java
License:Apache License
/** * Main entry point for the example.//from w ww. j a v a2s. c o m * * @param args arguments * @throws Exception when something goes wrong */ public static void main(String[] args) throws Exception { logger.info("Driver started"); Job job = new Job(); job.setJarByClass(ExcelDriver.class); job.setJobName("Excel Record Reader"); job.setMapperClass(ExcelMapper.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(ExcelInputFormat.class); job.waitForCompletion(true); }
From source file:com.nistfortunetellers.cleaning.NISTClean.java
License:Apache License
/** Runs a Job that is Text in and Out, and TextInput in and out, too! */ @SuppressWarnings({ "deprecation", "rawtypes" }) static void runTextJob(String jobName, Configuration jobConfig, String inputPath, String outputPath, Class<? extends Mapper> mapper, Class<? extends Reducer> reducer) { try {//from w w w . jav a2s . c o m Job genericJob = new Job(jobConfig, jobName); // DEBUG //genericJob.setNumReduceTasks(0); // END DEBUG genericJob.setJarByClass(NISTClean.class); genericJob.setOutputKeyClass(Text.class); genericJob.setOutputValueClass(Text.class); genericJob.setMapperClass(mapper); genericJob.setReducerClass(reducer); genericJob.setInputFormatClass(TextInputFormat.class); genericJob.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(genericJob, new Path(inputPath)); FileOutputFormat.setOutputPath(genericJob, new Path(outputPath)); genericJob.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:com.pagerankcalculator.TwitterPageRank.java
/** * Graph Parsing// w ww . ja v a2 s .co m * Memasukan data mentah dan melakukan inisialisasi pagerank * * @param in file data masukan * @param out direktori output */ public int parseGraph(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#1 Parsing Graph"); job.setJarByClass(TwitterPageRank.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(GraphParsingMapper.class); job.setReducerClass(GraphParsingReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pagerankcalculator.TwitterPageRank.java
public int calculatePagerank(String in, String out, int iteration) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#2 Iteration-" + iteration + " Calculating Page Rank"); job.setJarByClass(TwitterPageRank.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(PageRankCalculationMapper.class); job.setReducerClass(PageRankCalculationReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); }//from w w w . j a v a 2s . c o m return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pagerankcalculator.TwitterPageRank.java
public int sortPagerank(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#3 Sorting Page Rank"); job.setJarByClass(TwitterPageRank.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(PageRankSortingMapper.class); job.setReducerClass(PageRankSortingReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(1);// ww w. j a v a2 s . co m LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); job.setSortComparatorClass(DoubleSortDescComparator.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.peer2gear.nutch.xquery.ParseResult.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("Usage: %s [generic options] (<segment> ... | -dir <segments>) <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from w ww . j a va 2s . co m Job job = new Job(getConf()); for (int i = 0; i < args.length - 1; i++) { if ("-dir".equals(args[i])) { Path dir = new Path(args[++i]); FileSystem fs = dir.getFileSystem(getConf()); FileStatus[] fstats = fs.listStatus(dir, HadoopFSUtil.getPassDirectoriesFilter(fs)); Path[] segments = HadoopFSUtil.getPaths(fstats); for (Path segment : segments) { FileInputFormat.addInputPath(job, new Path(segment, ParseData.DIR_NAME)); } } else { FileInputFormat.addInputPath(job, new Path(args[i], ParseData.DIR_NAME)); } } FileOutputFormat.setOutputPath(job, new Path(args[args.length - 1])); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(GetResultMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.phantom.hadoop.examples.BaileyBorweinPlouffe.java
License:Apache License
/** Create and setup a job */ private static Job createJob(String name, Configuration conf) throws IOException { final Job job = new Job(conf, NAME + "_" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(BaileyBorweinPlouffe.class); // setup mapper job.setMapperClass(BbpMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); // setup reducer job.setReducerClass(BbpReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(1);/*from www . j av a2 s . co m*/ // setup input job.setInputFormatClass(BbpInputFormat.class); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job; }
From source file:com.phantom.hadoop.examples.Grep.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return 2; }/*from ww w.j a va 2s. co m*/ Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); Configuration conf = getConf(); conf.set(RegexMapper.PATTERN, args[2]); if (args.length == 4) conf.set(RegexMapper.GROUP, args[3]); Job grepJob = new Job(conf); try { grepJob.setJobName("grep-search"); FileInputFormat.setInputPaths(grepJob, args[0]); grepJob.setMapperClass(RegexMapper.class); grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(grepJob, tempDir); grepJob.setOutputFormatClass(SequenceFileOutputFormat.class); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); grepJob.waitForCompletion(true); Job sortJob = new Job(conf); sortJob.setJobName("grep-sort"); FileInputFormat.setInputPaths(sortJob, tempDir); sortJob.setInputFormatClass(SequenceFileInputFormat.class); sortJob.setMapperClass(InverseMapper.class); sortJob.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob, new Path(args[1])); sortJob.setSortComparatorClass( // sort by decreasing freq LongWritable.DecreasingComparator.class); sortJob.waitForCompletion(true); } finally { FileSystem.get(conf).delete(tempDir, true); } return 0; }