List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:Assignment5_P4_BinningPattern.Binning_IPAddress_By_DayDriver.java
/** * @param args the command line arguments *//*from ww w. j ava 2 s .co m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "IP Address By Hour"); job.setJarByClass(Binning_IPAddress_By_DayDriver.class); job.setMapperClass(Binning_IPAddress_By_Day_Mapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); MultipleOutputs.addNamedOutput(job, "textualBins", TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, "massaBins", TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); // set num of reduce tasks to 0 job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:at.illecker.hama.rootbeer.examples.util.RandomGraphGenerator.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 4) { System.out.println(// w w w.j a v a 2 s.c o m "USAGE: <Number of vertices> <Number of edges per vertex> <Number of partitions> <Outpath>"); return; } System.out.println(Arrays.toString(args)); Configuration conf = new Configuration(); conf.setInt("hama.num.vertices", Integer.parseInt(args[0])); conf.setInt("hama.num.partitions", Integer.parseInt(args[2])); conf.setInt("number.edges", Integer.parseInt(args[1])); Job job = new Job(conf); Path generated = new Path(new Path(args[3]).getParent(), "generated"); FileOutputFormat.setOutputPath(job, generated); FileSystem.get(conf).delete(generated, true); job.setJobName("RangeWriter"); job.setJarByClass(SortGenMapper.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setInputFormatClass(RangeInputFormat.class); job.waitForCompletion(true); conf.setInt("max.id", Integer.valueOf(args[0])); job = new Job(conf); FileOutputFormat.setOutputPath(job, new Path(args[3])); FileSystem.get(conf).delete(new Path(args[3]), true); job.setJobName("Random Vertex Writer"); FileInputFormat.addInputPath(job, generated); job.setJarByClass(RandomMapper.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(conf.getInt("hama.num.partitions", 2)); job.setPartitionerClass(HashPartitioner.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.waitForCompletion(true); }
From source file:AverageProj.AveragePrice.java
public int run(String[] args) throws Exception, ClassNotFoundException { Configuration conf = getConf(); Job job = new Job(conf, "Avg"); job.setJarByClass(AveragePrice.class); //final File f = new File(AveragePrice.class.getProtectionDomain().getCodeSource().getLocation().getPath()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setMapperClass(AvMapper.class); job.setMapOutputKeyClass(YearPrice.class); job.setMapOutputValueClass(DoubleWritable.class); job.setGroupingComparatorClass(YearSymComparator.class); // job.setCombinerClass(AvReducer.class); job.setReducerClass(AvReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(YearPrice.class); job.setOutputValueClass(DoubleWritable.class); job.setPartitionerClass(AvgPartitioner.class); job.setNumReduceTasks(7);// w w w .j a v a 2 s . co m System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
From source file:averagerating_youtube.AverageRating_Youtube.java
/** * @param args the command line arguments *///from w w w . ja v a 2 s .co m @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJobName("AverageRating_Youtube"); job.setJarByClass(AverageRating_Youtube.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(AvgRating_CommCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(AverageRating_CommentCountTuple.class); job.setCombinerClass(AvgRating_CommCountCombiner.class); job.setReducerClass(AvgRating_CommCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(AverageRating_CommentCountTuple.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:avro.mr.MapReduceAvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: AvroWordCount <input path> <output path>"); return -1; }/*from ww w .ja v a 2s.com*/ Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceAvroWordCount.class); job.setJobName("wordcount"); // We call setOutputSchema first so we can override the configuration // parameters it sets AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT))); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setSortComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return 0; }
From source file:avro.mr.MapReduceColorCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; }/*from w ww . j a v a 2 s . c om*/ Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceColorCount.class); job.setJobName("Color Count"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(ColorCountMapper.class); AvroJob.setInputKeySchema(job, User.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:avro.mr.MapReduceColorCountSeqFile.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; }/*ww w .j a v a 2s. c o m*/ Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceColorCountSeqFile.class); job.setJobName("Color Count 2"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroSequenceFileInputFormat.class); AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.LONG)); AvroJob.setInputValueSchema(job, User.getClassSchema()); job.setMapperClass(ColorCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroSequenceFileOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:azkaban.jobtype.examples.java.WordCount2.java
License:Apache License
@Override public void run() throws Exception { logger.info(String.format("Starting %s", getClass().getSimpleName())); //getConf() and set Job job = createJob(MapperClass.class, ReducerClass.class); job.setJarByClass(WordCount2.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(inputPath)); super.run();//from w ww . j av a2 s. c o m }
From source file:basic.PartitionGraph.java
License:Apache License
/** * Runs this tool./* w w w . j av a 2s . c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(RANGE, "use range partitioner")); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions") .create(NUM_PARTITIONS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES) || !cmdline.hasOption(NUM_PARTITIONS)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inPath = cmdline.getOptionValue(INPUT); String outPath = cmdline.getOptionValue(OUTPUT); int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS)); boolean useRange = cmdline.hasOption(RANGE); LOG.info("Tool name: " + PartitionGraph.class.getSimpleName()); LOG.info(" - input dir: " + inPath); LOG.info(" - output dir: " + outPath); LOG.info(" - num partitions: " + numParts); LOG.info(" - node cnt: " + nodeCount); LOG.info(" - use range partitioner: " + useRange); Configuration conf = getConf(); conf.setInt("NodeCount", nodeCount); Job job = Job.getInstance(conf); job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath); job.setJarByClass(PartitionGraph.class); job.setNumReduceTasks(numParts); FileInputFormat.setInputPaths(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); if (useRange) { job.setPartitionerClass(RangePartitioner.class); } FileSystem.get(conf).delete(new Path(outPath), true); job.waitForCompletion(true); return 0; }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
@SuppressWarnings("rawtypes") public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration()); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }//from ww w.j a v a 2 s. co m job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }