List of usage examples for org.apache.hadoop.mapred JobConf setJobName
public void setJobName(String name)
From source file:com.benchmark.mapred.terasort.TeraValidate.java
License:Apache License
public int run(String[] args) throws Exception { JobConf job = (JobConf) getConf(); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1);/*from w w w. ja v a2s. c om*/ // force a single split job.setLong("mapred.min.split.size", Long.MAX_VALUE); job.setInputFormat(TeraInputFormat.class); JobClient.runJob(job); return 0; }
From source file:com.chriscx.mapred.Driver.java
public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), Driver.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { if ("-skip".equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf); conf.setBoolean("wordcount.skip.patterns", true); } else {//from w ww .j a v a2 s .c o m other_args.add(args[i]); } } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:com.chriscx.matching.Driver.java
public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), com.chriscx.mapred.Driver.class); conf.setJobName("Matching"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { if ("-skip".equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf); conf.setBoolean("wordcount.skip.patterns", true); } else {// www . j a va 2 s. c om other_args.add(args[i]); } } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:com.cloudera.avro.AvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: AvroWordCount <input path> <output path>"); return -1; }// w ww . ja v a2 s.c o m JobConf conf = new JobConf(AvroWordCount.class); conf.setJobName("wordcount"); // We call setOutputSchema first so we can override the configuration // parameters it sets AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT))); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); return 0; }
From source file:com.cloudera.avro.MapredColorCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MapredColorCount <input path> <output path>"); return -1; }//from w w w . ja v a 2 s .c o m JobConf conf = new JobConf(getConf(), MapredColorCount.class); conf.setJobName("colorcount"); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); AvroJob.setMapperClass(conf, ColorCountMapper.class); AvroJob.setReducerClass(conf, ColorCountReducer.class); // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set // relevant config options such as input/output format, map output // classes, and output key class. AvroJob.setInputSchema(conf, User.getClassSchema()); AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT))); JobClient.runJob(conf); return 0; }
From source file:com.cloudera.hbase.OldWordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from w w w . j av a2s . co m } conf.setJobName("word count"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(conf, new Path(otherArgs[1])); JobClient.runJob(conf); return; }
From source file:com.cloudera.recordservice.avro.mapred.ColorCount.java
License:Apache License
/** * Run the MR1 color count with generic records, and return a map of favorite colors to * the number of users.//from w w w .ja va 2 s . c o m */ public static java.util.Map<String, Integer> countColors() throws IOException { String output = TestUtil.getTempDirectory(); Path outputPath = new Path(output); JobConf conf = new JobConf(ColorCount.class); conf.setJobName("MR1 Color Count With Generic Records"); conf.setInt("mapreduce.job.reduces", 1); conf.setBoolean(com.cloudera.recordservice.avro.AvroJob.USE_RECORD_SERVICE_INPUT_FORMAT_CONF_KEY, true); com.cloudera.recordservice.avro.AvroJob.setInputFormat(conf, org.apache.avro.mapred.AvroInputFormat.class); RecordServiceConfig.setInputTable(conf, "rs", "users"); FileOutputFormat.setOutputPath(conf, outputPath); AvroJob.setMapperClass(conf, Map.class); AvroJob.setReducerClass(conf, Reduce.class); AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT))); JobClient.runJob(conf); // Read the result and return it. Since we set the number of reducers to 1, // there is always just one file containing the value. SeekableInput input = new FsInput(new Path(output + "/part-00000.avro"), conf); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader); java.util.Map<String, Integer> colorMap = new HashMap<String, Integer>(); for (GenericRecord datum : fileReader) { colorMap.put(datum.get(0).toString(), Integer.parseInt(datum.get(1).toString())); } return colorMap; }
From source file:com.cloudera.recordservice.examples.mapreduce.MapredColorCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { org.apache.log4j.BasicConfigurator.configure(); if (args.length != 2) { System.err.println("Usage: MapredColorCount <input path> <output path>"); return -1; }//from ww w. j a v a 2 s .com JobConf conf = new JobConf(getConf(), MapredColorCount.class); conf.setJobName("colorcount With Generic Records"); // RECORDSERVICE: // By using the recordservice AvroJob utility, we can configure at run time to // switch between using the recordservice or not. // In this example, we'll set the conf to true to enable the RecordService.. conf.setBoolean(com.cloudera.recordservice.avro.AvroJob.USE_RECORD_SERVICE_INPUT_FORMAT_CONF_KEY, true); com.cloudera.recordservice.avro.AvroJob.setInputFormat(conf, org.apache.avro.mapred.AvroInputFormat.class); // RECORDSERVICE: // To read from a table instead of a path, comment out setInputPaths and instead use: RecordServiceConfig.setInputTable(conf, "rs", "users"); //FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); AvroJob.setMapperClass(conf, ColorCountMapper.class); AvroJob.setReducerClass(conf, ColorCountReducer.class); // Note that AvroJob.setOutputSchema set relevant config options such as output // format, map output classes, and output key class. // Do not need to setInputSchema when using Generic Records. AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT))); JobClient.runJob(conf); return 0; }
From source file:com.cloudera.recordservice.examples.mapreduce.WordCount.java
License:Apache License
public void run(String[] args) throws Exception { boolean useRecordService = true; if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } else if (args.length != 2) { System.err.println("Usage: WordCount <input path> <output path>"); System.exit(-1);//from w w w .j a va2 s. co m } String input = args[0].trim(); String output = args[1]; JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount-" + (useRecordService ? "with" : "without") + "-RecordService"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); if (useRecordService) { conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class); RecordServiceConfig.setInput(conf, input); } else { conf.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); } FileSystem fs = FileSystem.get(conf); Path outputPath = new Path(output); if (fs.exists(outputPath)) fs.delete(outputPath, true); conf.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(conf, outputPath); JobClient.runJob(conf); System.out.println("Done"); }
From source file:com.cloudera.recordservice.mapreduce.testapps.RecordCount.java
License:Apache License
public static long countRecords(String path) throws IOException { String output = TestUtil.getTempDirectory(); Path inputPath = new Path(path); Path outputPath = new Path(output); JobConf conf = new JobConf(RecordCount.class); conf.setJobName("recordcount"); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(LongWritable.class); conf.setInt("mapreduce.job.reduces", 1); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); JobClient.runJob(conf);//from w w w. j a v a2 s . com // Read the result and return it. Since we set the number of reducers to 1, // there is always just one file containing the value. FileSystem fs = outputPath.getFileSystem(conf); FSDataInputStream resultStream = fs.open(new Path(output + "/part-00000")); byte[] bytes = new byte[16]; int length = resultStream.read(bytes); String result = new String(bytes, 0, length).trim(); return Long.parseLong(result); }