List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass
public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException
From source file:com.antbrains.crf.hadoop.FeatureFilter.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 4) { System.err.println("Usage: wordcount <in> <out> filterRuleFile statOnly"); System.exit(-1);/* w ww . ja va 2 s .c o m*/ } boolean statOnly = true; if (otherArgs[3].equalsIgnoreCase("false")) { statOnly = false; } conf.set("statOnly", statOnly + ""); String rules = FileTools.readFile(otherArgs[2], "UTF8"); conf.set("rules", rules); conf.set("mapred.reduce.tasks", "0"); Job job = new Job(conf, FeatureFilter.class.getSimpleName()); job.setJarByClass(FeatureFilter.class); job.setMapperClass(CounterMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.antbrains.crf.hadoop.FeatureStat.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out> "); System.exit(2);//from www . j ava2s. c om } Job job = new Job(conf, FeatureStat.class.getSimpleName()); job.setJarByClass(FeatureStat.class); job.setMapperClass(CounterMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.antbrains.crf.hadoop.InstanceGenerator.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 4) { System.err.println("InstanceGenerator <in> <out> <featuredict> <template>"); System.exit(-1);/*from w ww . j a va2 s . c o m*/ } Template template = new Template(otherArgs[3], "UTF8"); conf.set("template", object2String(template)); // conf.set("tc", object2String(tc)); DistributedCache.addCacheFile(new URI(otherArgs[2]), conf); conf.set("dict", otherArgs[2]); conf.set("mapred.reduce.tasks", "0"); Job job = new Job(conf, InstanceGenerator.class.getSimpleName()); job.setJarByClass(InstanceGenerator.class); job.setMapperClass(CounterMapper.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.antbrains.crf.hadoop.ParallelTraining.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 4) { System.err.println("ParallelTraining <instanceDir> <outDir> <featurecount> <training-params>"); System.exit(-1);//from w w w.ja va 2 s . c o m } int featureCount = Integer.valueOf(otherArgs[2]); // conf.set("tc", object2String(tc)); conf.set("pt.iterate", "1"); conf.set("pt.featureCount", featureCount + ""); TrainingParams params = SgdCrf.loadParams(otherArgs[3]); System.out.println(new Gson().toJson(params)); conf.set("pt.params", object2String(params)); Job job = new Job(conf, ParallelTraining.class.getSimpleName()); job.setJarByClass(ParallelTraining.class); job.setMapperClass(TrainingMapper.class); job.setReducerClass(TrainingReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(TrainingWeights.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.antbrains.crf.hadoop.ParallelTraining2.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); FileSystem fs = FileSystem.get(conf); TrainingParams params = SgdCrf.loadParams(otherArgs[3]); System.out.println(new Gson().toJson(params)); if (otherArgs.length != 5) { System.err.println(/*from w w w. ja va2 s .com*/ "ParallelTraining2 <instanceDir> <outDir> <featurecount> <training-params> <out-iter>"); System.exit(-1); } int featureCount = Integer.valueOf(otherArgs[2]); // conf.set("tc", object2String(tc)); int outIter = Integer.valueOf(otherArgs[4]); String prevOutDir = ""; for (int i = 1; i <= outIter; i++) { System.out.println("iterator: " + i); conf.set("pt.iterate", i + ""); conf.set("pt.featureCount", featureCount + ""); conf.set("pt.params", object2String(params)); String outDir = otherArgs[1] + "/result" + i; if (i > 1) { conf.set("paramDir", prevOutDir); } prevOutDir = outDir; fs.delete(new Path(outDir), true); Job job = new Job(conf, ParallelTraining2.class.getSimpleName()); job.setJarByClass(ParallelTraining2.class); job.setMapperClass(TrainingMapper.class); job.setReducerClass(TrainingReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); System.out.println("outDir: " + outDir); FileOutputFormat.setOutputPath(job, new Path(outDir)); boolean res = job.waitForCompletion(true); if (!res) { System.err.println("iter " + i + " failed"); break; } } }
From source file:com.antbrains.crf.hadoop.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);// w w w .j av a2 s.co m } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.app.hadoopexample.MaxTemperatureDriver.java
public int run(String[] arg) throws Exception { String[] args = { "C:/Hadoop/input/LICENSE.txt", "C:/Hadoop/output/LICENSE.txt" }; if (args.length != 2) { System.err.println("Usage: MaxTemperatureDriver <input path> <outputpath>"); System.exit(-1);//from w ww.j av a2 s . c o m } Job job = new Job(); job.setJarByClass(MaxTemperatureDriver.class); job.setJobName("Max Temperature"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxTemperatureMapper.class); job.setReducerClass(MaxTemperatureReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java
License:Apache License
/** * Test for map only job.//w w w. j a v a 2 s . com * @throws Exception if failed */ @Test public void map_only() throws Exception { Job job = newJob(); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(SimpleMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); File inputDir = folder.newFolder(); File inputFile = new File(inputDir, "input.txt"); write(inputFile, "Hello, world!"); File outputDir = folder.newFolder(); outputDir.delete(); FileInputFormat.setInputPaths(job, new Path(inputFile.toURI())); FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); assertThat(new SimpleJobRunner().run(job), is(true)); assertThat(trimHead(read(outputDir)), is(set("Hello, world!"))); }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java
License:Apache License
/** * Test for map-reduce job./* ww w . j av a 2s. c o m*/ * @throws Exception if failed */ @Test public void map_reduce() throws Exception { Job job = newJob(); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setSortComparatorClass(Text.Comparator.class); job.setGroupingComparatorClass(Text.Comparator.class); job.setReducerClass(WordCountReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); File inputDir = folder.newFolder(); File inputFile = new File(inputDir, "input.txt"); write(inputFile, new String[] { "a b c d", "a a b c", "c", }); File outputDir = folder.newFolder(); outputDir.delete(); FileInputFormat.setInputPaths(job, new Path(inputFile.toURI())); FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); assertThat(new SimpleJobRunner().run(job), is(true)); assertThat(toMap(read(outputDir)), is(map(new String[] { "a", "3", "b", "2", "c", "3", "d", "1", }))); }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java
License:Apache License
/** * Test for wrong job./*from w w w . j ava 2 s. com*/ * @throws Exception if failed */ @Test public void exception() throws Exception { Job job = newJob(); job.setJobName("w/ exception"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(InvalidMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); File inputDir = folder.newFolder(); File inputFile = new File(inputDir, "input.txt"); write(inputFile, "testing"); File outputDir = folder.newFolder(); outputDir.delete(); FileInputFormat.setInputPaths(job, new Path(inputFile.toURI())); FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); assertThat(new SimpleJobRunner().run(job), is(false)); }