List of usage examples for org.apache.hadoop.mapreduce.lib.input FileInputFormat setInputPaths
public static void setInputPaths(Job job, Path... inputPaths) throws IOException
From source file:com.avira.couchdoop.jobs.CouchbaseExporter.java
License:Apache License
public Job configureJob(Configuration conf, String input) throws IOException { conf.setInt("mapreduce.map.failures.maxpercent", 5); conf.setInt("mapred.max.map.failures.percent", 5); conf.setInt("mapred.max.tracker.failures", 20); Job job = Job.getInstance(conf);// w w w .j a v a 2 s . c o m job.setJarByClass(CouchbaseExporter.class); // Input FileInputFormat.setInputPaths(job, input); // Mapper job.setMapperClass(CsvToCouchbaseMapper.class); job.setMapOutputKeyClass(String.class); job.setMapOutputValueClass(CouchbaseAction.class); // Reducer job.setNumReduceTasks(0); // Output job.setOutputFormatClass(CouchbaseOutputFormat.class); job.setOutputKeyClass(String.class); job.setOutputValueClass(CouchbaseAction.class); return job; }
From source file:com.awcoleman.ExampleJobSummaryLogWithOutput.BinRecToAvroRecDriver.java
License:Apache License
public int run(String[] args) throws Exception { String input = null;/*ww w . jav a2 s . c om*/ String output = null; if (args.length < 2) { System.err.printf("Usage: %s <input> <output>\n", this.getClass().getSimpleName()); return -1; } else { input = args[0]; output = args[1]; } Job job = Job.getInstance(getConf(), "BinRecToAvroRecDriver"); Configuration conf = job.getConfiguration(); //Add job log to hold Driver logging (and any summary info about the dataset,job, or counters we want to write) String fapath = createTempFileAppender(job); //get schema Schema outSchema = ReflectData.get().getSchema(com.awcoleman.examples.avro.BinRecForPartitions.class); job.getConfiguration().set("outSchema", outSchema.toString()); //Job conf settings job.setJarByClass(BinRecToAvroRecDriver.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(BinRecInputFormat.class); job.setOutputFormatClass(AvroKeyOutputFormat.class); AvroJob.setOutputKeySchema(job, outSchema); AvroJob.setMapOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setMapOutputValueSchema(job, outSchema); //Job output compression FileOutputFormat.setCompressOutput(job, true); job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.DEFLATE_CODEC); //Input and Output Paths FileInputFormat.setInputPaths(job, new Path(input)); Path outPath = new Path(output); FileOutputFormat.setOutputPath(job, outPath); outPath.getFileSystem(conf).delete(outPath, true); boolean jobCompletionStatus = job.waitForCompletion(true); //Print Custom Counters before exiting Counters counters = job.getCounters(); for (MYJOB_CNTRS customCounter : MYJOB_CNTRS.values()) { Counter thisCounter = counters.findCounter(customCounter); System.out.println("Custom Counter " + customCounter + "=" + thisCounter.getValue()); } long mycnt1 = job.getCounters() .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS", "MYCNT1") .getValue(); long mycnt2 = job.getCounters() .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS", "MYCNT2") .getValue(); long mycnt3 = job.getCounters() .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS", "MYCNT3") .getValue(); long myfakekpi = mycnt1 - mycnt2; String msgMyfakekpi = "The Fake KPI of the Dataset: " + String.format("%,d", myfakekpi); System.out.println(msgMyfakekpi); logger.info(msgMyfakekpi); //Finished, so move job log to HDFS in _log dir, clean copyTempFileAppenderToHDFSOutpath(job, fapath, output); return jobCompletionStatus ? 0 : 1; }
From source file:com.baidu.cloud.bmr.mapreduce.AccessLogAnalyzer.java
License:Open Source License
public static void main(String[] args) { Configuration conf = new Configuration(); if (args.length != 2) { System.err.println("Usage: AccessLogAnalyzer <input path> <output path>"); System.exit(-1);//from www .ja va 2 s . c o m } String inputPath = args[0]; String outputPath = args[1]; try { Job job = new Job(conf, "AccessLogAnalyzer"); job.setJarByClass(AccessLogAnalyzer.class); job.setMapperClass(AccessLogAnalyzerMapper.class); job.setReducerClass(AccessLogAnalyzerReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (IOException | ClassNotFoundException | InterruptedException e) { } }
From source file:com.bigdog.hadoop.mapreduce.combine.WordCountCombineApp.java
public void combine() throws Exception { Configuration conf = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf); final Path outPath = new Path(OUT_PATH); if (fileSystem.exists(outPath)) { fileSystem.delete(outPath, true); }//from w w w .ja v a2s .c o m final Job job = new Job(conf, WordCountCombineApp.class.getSimpleName()); //1.1?? FileInputFormat.setInputPaths(job, INPUT_PATH); //???? //job.setInputFormatClass(TextInputFormat.class); //1.2 map job.setMapperClass(MyMapper.class); //map<k,v><k3,v3><k2,v2>?? //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(LongWritable.class); //1.3 //job.setPartitionerClass(HashPartitioner.class); //reduce? //job.setNumReduceTasks(1); //1.4 TODO ?? //1.5 job.setCombinerClass(MyCombiner.class); //2.2 reduce job.setReducerClass(MyReducer.class); //reduce job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //2.3 FileOutputFormat.setOutputPath(job, outPath); //? //job.setOutputFormatClass(TextOutputFormat.class); //job??JobTracker? job.waitForCompletion(true); }
From source file:com.bigdog.hadoop.mapreduce.counter.WordCountCounterApp.java
public void CustomerCounter() throws Exception { Configuration conf = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf); final Path outPath = new Path(OUT_PATH); if (fileSystem.exists(outPath)) { fileSystem.delete(outPath, true); }/*from w ww. j av a2 s . co m*/ final Job job = new Job(conf, WordCountCounterApp.class.getSimpleName()); //1.1?? FileInputFormat.setInputPaths(job, INPUT_PATH); //???? //job.setInputFormatClass(TextInputFormat.class); //1.2 map job.setMapperClass(MyMapper.class); //map<k,v><k3,v3><k2,v2>?? //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(LongWritable.class); //1.3 //job.setPartitionerClass(HashPartitioner.class); //reduce? //job.setNumReduceTasks(1); //1.4 TODO ?? //1.5 TODO //2.2 reduce job.setReducerClass(MyReducer.class); //reduce job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //2.3 FileOutputFormat.setOutputPath(job, outPath); //? //job.setOutputFormatClass(TextOutputFormat.class); //job??JobTracker? job.waitForCompletion(true); }
From source file:com.bigdog.hadoop.mapreduce.group.GroupApp.java
public void group() throws Exception { final Configuration configuration = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), configuration); if (fileSystem.exists(new Path(OUT_PATH))) { fileSystem.delete(new Path(OUT_PATH), true); }// w w w . j a va 2s . c o m final Job job = new Job(configuration, GroupApp.class.getSimpleName()); //1.1 FileInputFormat.setInputPaths(job, INPUT_PATH); //?? job.setInputFormatClass(TextInputFormat.class); //1.2Mapper job.setMapperClass(MyMapper.class); //<k2,v2> job.setMapOutputKeyClass(NewK2.class); job.setMapOutputValueClass(LongWritable.class); //1.3 job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(1); //1.4 TODO ?? job.setGroupingComparatorClass(MyGroupingComparator.class); //1.5 TODO ?? //2.2 reduce job.setReducerClass(MyReducer.class); //<k3,v3> job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); //2.3 FileOutputFormat.setOutputPath(job, new Path(OUT_PATH)); //? job.setOutputFormatClass(TextOutputFormat.class); //???JobTracker job.waitForCompletion(true); }
From source file:com.bigdog.hadoop.mapreduce.partition.KpiApp.java
public void kpi() throws Exception { final Job job = new Job(new Configuration(), KpiApp.class.getSimpleName()); job.setJarByClass(KpiApp.class); //1.1 // w ww . j a va 2 s. co m FileInputFormat.setInputPaths(job, INPUT_PATH); //?? job.setInputFormatClass(TextInputFormat.class); //1.2Mapper job.setMapperClass(MyMapper.class); //<k2,v2> job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(KpiWritable.class); //1.3 job.setPartitionerClass(KpiPartitioner.class); job.setNumReduceTasks(2); //1.4 TODO ?? //1.5 TODO ?? //2.2 reduce job.setReducerClass(MyReducer.class); //<k3,v3> job.setOutputKeyClass(Text.class); job.setOutputValueClass(KpiWritable.class); //2.3 FileOutputFormat.setOutputPath(job, new Path(OUT_PATH)); //? job.setOutputFormatClass(TextOutputFormat.class); //???JobTracker job.waitForCompletion(true); }
From source file:com.bigfishgames.biginsights.upsight.mapreduce.MapReduceAvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: AvroWordCount <input path> <output path>"); return -1; }// www . ja v a 2s . com Job job = new Job(getConf()); job.setJarByClass(MapReduceAvroWordCount.class); job.setJobName("wordcount"); // We call setOutputSchema first so we can override the configuration // parameters it sets // AvroJob.setOutputKeySchema(job, // Pair.getPairSchema(Schema.create(Type.STRING), // Schema.create(Type.NULL))); AvroJob.setOutputKeySchema(job, Event.getClassSchema()); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MyAvroKeyOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setSortComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return 0; }
From source file:com.bizosys.oneline.maintenance.Import.java
License:Apache License
/** * Sets up the actual job.//from w w w .j a va2 s. c o m * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path inputDir = new Path(args[1]); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Importer.class); System.out.println("Input Dir:" + inputDir); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(Importer.class); // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); return job; }
From source file:com.cloudera.avro.MapReduceAvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: AvroWordCount <input path> <output path>"); return -1; }/*from w ww. j a v a 2 s .c om*/ Job job = new Job(getConf()); job.setJarByClass(MapReduceAvroWordCount.class); job.setJobName("wordcount"); // We call setOutputSchema first so we can override the configuration // parameters it sets AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT))); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setSortComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return 0; }