List of usage examples for org.apache.hadoop.mapreduce.lib.input FileInputFormat addInputPaths
public static void addInputPaths(Job job, String commaSeparatedPaths) throws IOException
From source file:a.b.c.MultiFileWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage();//w w w.ja va 2 s.c om return 2; } Job job = new Job(getConf()); job.setJobName("MultiFileWordCount"); job.setJarByClass(MultiFileWordCount.class); //set the InputFormat of the job to our InputFormat job.setInputFormatClass(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the WordCount Reducer job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.ifeng.logparser.NginxLogDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from w w w . ja v a2s . co m Job job = Job.getInstance(super.getConf()); FileInputFormat.setInputDirRecursive(job, true); //FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); FileInputFormat.addInputPaths(job, args[0]); job.setMapperClass(NginxLogMapper.class); job.setReducerClass(NginxLogReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.metamx.druid.indexer.path.StaticPathSpec.java
License:Open Source License
@Override public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException { log.info("Adding paths[%s]", paths); FileInputFormat.addInputPaths(job, paths); return job;// w w w .ja v a 2 s .co m }
From source file:com.phantom.hadoop.examples.MultiFileWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage();// www . j ava 2 s. c om return 2; } Job job = new Job(getConf()); job.setJobName("MultiFileWordCount"); job.setJarByClass(MultiFileWordCount.class); // set the InputFormat of the job to our InputFormat job.setInputFormatClass(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); // use the defined mapper job.setMapperClass(MapClass.class); // use the WordCount Reducer job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.yourcompany.hadoop.mapreduce.aggregate.UnionDriver.java
License:Apache License
private void parseArguements(String[] args, Job job) throws IOException { for (int i = 0; i < args.length; ++i) { if ("-input".equals(args[i])) { FileInputFormat.addInputPaths(job, args[++i]); } else if ("-output".equals(args[i])) { FileOutputFormat.setOutputPath(job, new Path(args[++i])); }// w ww . j av a2s.co m } }
From source file:com.yourcompany.hadoop.mapreduce.hcatalog.HCatalogExampleDriver.java
License:Apache License
private void parseArguements(String[] args, Job job) throws IOException { String outputTableName = null; String dbName = null;//from w w w . ja v a2s .co m String inputTableName = null; for (int i = 0; i < args.length; ++i) { if ("-input".equals(args[i])) { FileInputFormat.addInputPaths(job, args[++i]); } else if ("-output".equals(args[i])) { FileOutputFormat.setOutputPath(job, new Path(args[++i])); } else if ("-dbName".equals(args[i])) { dbName = args[++i]; } else if ("-inputTableName".equals(args[i])) { inputTableName = args[++i]; } else if ("-outputTableName".equals(args[i])) { outputTableName = args[++i]; } } HCatInputFormat.setInput(job.getConfiguration(), dbName, inputTableName); HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null)); HCatSchema s = HCatOutputFormat.getTableSchema(job.getConfiguration()); HCatOutputFormat.setSchema(job, s); }
From source file:com.yourcompany.hadoop.mapreduce.KoreanWordcountDriver.java
License:Apache License
private void parseArguements(String[] args, Job job) throws IOException { for (int i = 0; i < args.length; ++i) { if ("-input".equals(args[i])) { FileInputFormat.addInputPaths(job, args[++i]); } else if ("-output".equals(args[i])) { FileOutputFormat.setOutputPath(job, new Path(args[++i])); } else if ("-exactMatch".equals(args[i])) { job.getConfiguration().set("exactMatch", args[++i]); } else if ("-bigrammable".equals(args[i])) { job.getConfiguration().set("bigrammable", args[++i]); } else if ("-hasOrigin".equals(args[i])) { job.getConfiguration().set("hasOrigin", args[++i]); } else if ("-originCNoun".equals(args[i])) { job.getConfiguration().set("originCNoun", args[++i]); } else if ("-reducer".equals(args[i])) { job.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-minSupport".equals(args[i])) { job.getConfiguration().set("minSupport", args[++i]); }/*from w w w. j ava2 s .c o m*/ } }
From source file:com.yourcompany.hadoop.mapreduce.lexical.LexicalAnalyzerDriver.java
License:Apache License
private void parseArguements(String[] args, Job job) throws IOException { for (int i = 0; i < args.length; ++i) { if ("-input".equals(args[i])) { FileInputFormat.addInputPaths(job, args[++i]); } else if ("-output".equals(args[i])) { FileOutputFormat.setOutputPath(job, new Path(args[++i])); } else if ("-indexmode".equals(args[i])) { job.getConfiguration().set("indexmode", args[++i]); } else if ("-reducer".equals(args[i])) { job.setNumReduceTasks(Integer.parseInt(args[++i])); }/*from w ww . j a va2s . com*/ } }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.ConfigurationHelper.java
License:Apache License
/** * Job configurator/*from ww w . j a va 2 s . co m*/ * * @param job job instance * @param jarByClass class of the jar * @param mapperClass mapper * @param reducerClass reducer * @param commaSeparatedInputFiles input paths * @param outputPath output * @throws IOException I/O exception */ public static void configureJob(Job job, Class<?> jarByClass, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass, String commaSeparatedInputFiles, String outputPath) throws IOException { job.setJarByClass(jarByClass); job.setJobName(jarByClass.getName()); // mapper job.setMapperClass(mapperClass); // reducer job.setReducerClass(reducerClass); // input-output is warc job.setInputFormatClass(WARCInputFormat.class); // prevent producing empty files LazyOutputFormat.setOutputFormatClass(job, WARCOutputFormat.class); // intermediate data job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(WARCWritable.class); // output data job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(WARCWritable.class); // set output compression to GZip FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.examples.SimpleTextSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance();// ww w . ja v a 2s. c om job.setJarByClass(SimpleTextSearch.class); job.setJobName(SimpleTextSearch.class.getName()); // mapper job.setMapperClass(TextSearchMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); // combiner + reducer job.setCombinerClass(TextLongCountingReducer.class); job.setReducerClass(TextLongCountingReducer.class); job.setInputFormatClass(WARCInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // paths String commaSeparatedInputFiles = otherArgs[0]; String outputPath = otherArgs[1]; // regex with a phrase to be searched for String regex = otherArgs[2]; job.getConfiguration().set(MAPREDUCE_MAP_REGEX, regex); FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job.waitForCompletion(true) ? 0 : 1; }