List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:com.bigfishgames.biginsights.upsight.mapreduce.MapReduceAvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: AvroWordCount <input path> <output path>"); return -1; }/*from w w w .j a v a 2 s .c o m*/ Job job = new Job(getConf()); job.setJarByClass(MapReduceAvroWordCount.class); job.setJobName("wordcount"); // We call setOutputSchema first so we can override the configuration // parameters it sets // AvroJob.setOutputKeySchema(job, // Pair.getPairSchema(Schema.create(Type.STRING), // Schema.create(Type.NULL))); AvroJob.setOutputKeySchema(job, Event.getClassSchema()); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MyAvroKeyOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setSortComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return 0; }
From source file:com.ckelsel.hadoop.MaxTemperature.App.java
License:Open Source License
public static void main(String[] args) { if (args.length != 2) { System.err.println("Usage: MaxTemperature <input path> <output path>"); System.exit(-1);/* w ww.jav a2 s. c o m*/ } System.out.println(args[0]); System.out.println(args[1]); try { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "localhost:9001"); Job job = Job.getInstance(conf); job.setJarByClass(App.class); job.setJobName("Max temperature"); FileInputFormat.addInputPath(job, new Path(args[0])); // delete output if exists Path outPath = new Path(args[1]); outPath.getFileSystem(conf).delete(outPath, true); FileOutputFormat.setOutputPath(job, outPath); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : -1); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.cloudera.avro.MapReduceAvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: AvroWordCount <input path> <output path>"); return -1; }/*from w ww. j a v a 2 s.c o m*/ Job job = new Job(getConf()); job.setJarByClass(MapReduceAvroWordCount.class); job.setJobName("wordcount"); // We call setOutputSchema first so we can override the configuration // parameters it sets AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT))); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setSortComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return 0; }
From source file:com.cloudera.avro.MapReduceColorCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; }// ww w. j a va 2s.com Job job = new Job(getConf()); job.setJarByClass(MapReduceColorCount.class); job.setJobName("Color Count"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(ColorCountMapper.class); AvroJob.setInputKeySchema(job, User.getClassSchema()); AvroJob.setMapOutputValueSchema(job, User.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.cloudera.castagna.logparser.mr.StatusCodesStats.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from w w w . j a v a 2s .com*/ Configuration configuration = getConf(); boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION, Constants.OPTION_USE_COMPRESSION_DEFAULT); if (useCompression) { configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.output.compression.type", "BLOCK"); configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); } boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERWRITE_OUTPUT, Constants.OPTION_OVERWRITE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Job job = Job.getInstance(configuration); job.setJobName(Constants.STATUS_CODES_STATS); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(StatusCodesStatsMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(StatusCodesStatsCombiner.class); job.setReducerClass(StatusCodesStatsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); Utils.setReducers(job, configuration, log); job.setOutputFormatClass(TextOutputFormat.class); if (log.isDebugEnabled()) Utils.log(job, log); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.castagna.logparser.mr.TranscodeLogs.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/* www . j a v a 2 s .c o m*/ Configuration configuration = getConf(); boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERWRITE_OUTPUT, Constants.OPTION_OVERWRITE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Job job = Job.getInstance(configuration); job.setJobName(Constants.STATUS_CODES_STATS); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TranscodeLogsMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); if (log.isDebugEnabled()) Utils.log(job, log); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.crunch.impl.mr.plan.JobPrototype.java
License:Open Source License
private CrunchJob build(Class<?> jarClass, Configuration conf) throws IOException { Job job = new Job(conf); conf = job.getConfiguration();//from ww w . jav a 2 s. c o m job.setJarByClass(jarClass); Set<DoNode> outputNodes = Sets.newHashSet(); Set<Target> targets = targetsToNodePaths.keySet(); MSCROutputHandler outputHandler = new MSCROutputHandler(job, workingPath, group == null); for (Target target : targets) { DoNode node = null; for (NodePath nodePath : targetsToNodePaths.get(target)) { if (node == null) { PCollectionImpl collect = nodePath.tail(); node = DoNode.createOutputNode(target.toString(), collect.getPType()); outputHandler.configureNode(node, target); } outputNodes.add(walkPath(nodePath.descendingIterator(), node)); } } job.setMapperClass(CrunchMapper.class); List<DoNode> inputNodes; DoNode reduceNode = null; RTNodeSerializer serializer = new RTNodeSerializer(); if (group != null) { job.setReducerClass(CrunchReducer.class); List<DoNode> reduceNodes = Lists.newArrayList(outputNodes); reduceNode = reduceNodes.get(0); serializer.serialize(reduceNodes, conf, NodeContext.REDUCE); group.configureShuffle(job); DoNode mapOutputNode = group.getGroupingNode(); if (reduceNodes.size() == 1 && combineFnTable != null) { // Handle the combiner case DoNode mapSideCombineNode = combineFnTable.createDoNode(); mapSideCombineNode.addChild(mapOutputNode); mapOutputNode = mapSideCombineNode; } Set<DoNode> mapNodes = Sets.newHashSet(); for (NodePath nodePath : mapNodePaths) { // Advance these one step, since we've already configured // the grouping node, and the PGroupedTableImpl is the tail // of the NodePath. Iterator<PCollectionImpl> iter = nodePath.descendingIterator(); iter.next(); mapNodes.add(walkPath(iter, mapOutputNode)); } inputNodes = Lists.newArrayList(mapNodes); serializer.serialize(inputNodes, conf, NodeContext.MAP); } else { // No grouping job.setNumReduceTasks(0); inputNodes = Lists.newArrayList(outputNodes); serializer.serialize(inputNodes, conf, NodeContext.MAP); } if (inputNodes.size() == 1) { DoNode inputNode = inputNodes.get(0); inputNode.getSource().configureSource(job, -1); } else { for (int i = 0; i < inputNodes.size(); i++) { DoNode inputNode = inputNodes.get(i); inputNode.getSource().configureSource(job, i); } job.setInputFormatClass(CrunchInputFormat.class); } job.setJobName(createJobName(inputNodes, reduceNode)); return new CrunchJob(job, workingPath, outputHandler); }
From source file:com.cloudera.recordservice.avro.mapreduce.ColorCount.java
License:Apache License
/** * Run the MR2 color count with generic records, and return a map of favorite colors to * the number of users./*from ww w . j a v a 2s. co m*/ */ public static java.util.Map<String, Integer> countColors() throws IOException, ClassNotFoundException, InterruptedException { String output = TestUtil.getTempDirectory(); Path outputPath = new Path(output); JobConf conf = new JobConf(ColorCount.class); conf.setInt("mapreduce.job.reduces", 1); Job job = Job.getInstance(conf); job.setJarByClass(ColorCount.class); job.setJobName("MR2 Color Count With Generic Records"); RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users"); job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(Map.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(Reduce.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); job.waitForCompletion(false); // Read the result and return it. Since we set the number of reducers to 1, // there is always just one file containing the value. SeekableInput input = new FsInput(new Path(output + "/part-r-00000.avro"), conf); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader); java.util.Map<String, Integer> colorMap = new HashMap<String, Integer>(); for (GenericRecord datum : fileReader) { colorMap.put(datum.get(0).toString(), Integer.parseInt(datum.get(1).toString())); } return colorMap; }
From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceAgeCount.java
License:Apache License
public int run(String[] args) throws Exception { org.apache.log4j.BasicConfigurator.configure(); if (args.length != 2) { System.err.println("Usage: MapReduceAgeCount <input path> <output path>"); return -1; }/*from w ww. j a v a2s . c o m*/ Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceAgeCount.class); job.setJobName("Age Count"); // RECORDSERVICE: // To read from a table instead of a path, comment out // FileInputFormat.setInputPaths() and instead use: // FileInputFormat.setInputPaths(job, new Path(args[0])); RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); // RECORDSERVICE: // Use the RecordService version of the AvroKeyValueInputFormat job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyValueInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(AgeCountMapper.class); // Set schema for input key and value. AvroJob.setInputKeySchema(job, UserKey.getClassSchema()); AvroJob.setInputValueSchema(job, UserValue.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(AgeCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceColorCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { org.apache.log4j.BasicConfigurator.configure(); if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; }//from w w w .java 2 s . c om Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceColorCount.class); job.setJobName("Color Count"); // RECORDSERVICE: // To read from a table instead of a path, comment out // FileInputFormat.setInputPaths() and instead use: //FileInputFormat.setInputPaths(job, new Path(args[0])); RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users"); // RECORDSERVICE: // Use the RecordService version of the AvroKeyInputFormat job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class); //job.setInputFormatClass(AvroKeyInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(ColorCountMapper.class); AvroJob.setInputKeySchema(job, User.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }