List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass
public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException
From source file:com.hortonworks.mapreduce.URLCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " "); Job job = Job.getInstance(conf, "URLCount"); job.setJarByClass(getClass());//from w w w. j a v a 2 s.c o m job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(URLCountM.class); job.setReducerClass(URLCountR.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return (job.waitForCompletion(true) == true ? 0 : -1); }
From source file:com.hortonworks.pso.data.generator.mapreduce.DataGenTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); // new Job(conf, this.getClass().getCanonicalName()); // Configuration conf = getConf(); int mappers = 2; String output = null;//from w w w . j a va2 s .co m String config = null; long count = 100; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-mappers".equals(args[i])) { mappers = Integer.parseInt(args[++i]); otherArgs.add("-Dmapreduce.job.maps=" + Integer.toString(mappers)); } else if ("-output".equals(args[i])) { output = args[++i]; } else if ("-json.cfg".equals(args[i])) { config = args[++i]; } else if ("-count".equals(args[i])) { count = Long.parseLong(args[++i]); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } job.getConfiguration().set("json.cfg", config); String[] altArgs = new String[otherArgs.size()]; otherArgs.toArray(altArgs); GenericOptionsParser gop = new GenericOptionsParser(job.getConfiguration(), altArgs); DataGenInputFormat.setNumberOfRows(job, count); job.setJarByClass(DataGenTool.class); Path output_path = new Path(output); if (output_path.getFileSystem(getConf()).exists(output_path)) { throw new IOException("Output directory " + output_path + " already exists."); } FileOutputFormat.setOutputPath(job, output_path); job.setMapperClass(DataGenMapper.class); // Map Only Job job.setNumReduceTasks(0); // job.setReducerClass(RerateReducer.class); job.setInputFormatClass(DataGenInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.howbuy.hadoop.mr.online.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysrot <in> <out>"); System.exit(2);/*from w w w . jav a2 s . co m*/ } Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); // job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(3); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.huihui.mr.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { /*HadoopJava.util.Properties??Apache Jakarta Commons Configuration?? * ????API?org.apache.hadoop.conf.Configuration??? *//*ww w. ja v a2 s . c o m*/ Configuration conf = new Configuration(); /* * ?HadoopGenericOptionsParser ??? -D mapreduce.job.queuename ??getRemainingArgs()? ?"xrli/STJoin_in","xrli/STJoin_out"?otherArgs ? fs jt libjars files archives D tokenCacheFile */ String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } conf.set("fs.defaultFS", "hdfs://localhost:9000"); // Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); //??? job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //Path ???URI?Path???Path String input = "hdfs://localhost:9000/input/"; String output = "hdfs://localhost:9000/user/hdfs/log_kpi/browser1"; FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); //???? System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hzy.test.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { // String input = "hdfs://192.168.1.118:9000/user/hdfs/log_kpi/"; // String output = "hdfs://192.168.1.118:9000/user/hdfs/log_kpi/wc/"; String input = "/tmp/data.txt"; // String input = args[0]; String output = "/tmp/t1"; // String output = args[1]; Configuration conf = HdfsDAO.config(); // conf.set("mapreduce.framework.name", "yarn"); //// conf.set("hbase.zookeeper.quorum", "hadoop01:2181"); // conf.set("fs.default.name", "hdfs://hadoop01:9000"); // conf.set("yarn.resourcemanager.resource-tracker.address", "hadoop01:8031"); // conf.set("yarn.resourcemanager.address", "hadoop01:8032"); // conf.set("yarn.resourcemanager.scheduler.address", "hadoop01:8030"); // conf.set("yarn.resourcemanager.admin.address", "hadoop01:8033"); // conf.set("mapreduce.jobhistory.address", "hadoop01:10020"); // conf.set("mapreduce.jobhistory.webapp.address", "hadoop01:19888"); // String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // if (otherArgs.length < 2) { // System.err.println("Usage: wordcount <in> [<in>...] <out>"); // System.exit(2); // }//from www . j av a2 s . c o m Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(input)); // } FileOutputFormat.setOutputPath(job, new Path(output)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ifeng.hadoop.thinker.LogDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from www . j av a 2 s. c o m*/ Job job = Job.getInstance(super.getConf()); job.setJarByClass(getClass()); FileUtil.fullyDelete(new File(args[1])); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LogMapper.class); job.setReducerClass(LogReducer.class); job.setCombinerClass(LogReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.ifeng.hadoop.thinker.LogLocalDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from w w w. j av a 2 s . co m Job job = Job.getInstance(super.getConf()); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LogMapper.class); job.setReducerClass(LogReducer.class); job.setCombinerClass(LogReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.ifeng.ipserver.IPServerLogParseDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(super.getConf()); job.setJarByClass(getClass());// w w w. j ava2 s . c o m FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(IPServerLogParseMapper.class); job.setReducerClass(IPServerLogParseReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.ifeng.logparser.NginxLogDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from ww w . ja v a2 s . c om*/ Job job = Job.getInstance(super.getConf()); FileInputFormat.setInputDirRecursive(job, true); //FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); FileInputFormat.addInputPaths(job, args[0]); job.setMapperClass(NginxLogMapper.class); job.setReducerClass(NginxLogReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.ifeng.logparser.NginxLogDriver2.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from w w w.j a va 2 s . c o m log.info("Input: {} , Outpu: {}", args[0], args[1]); Job job = Job.getInstance(super.getConf()); //FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //FileInputFormat.addInputPaths(job, args[0]); job.setMapperClass(NginxLogMapper.class); job.setReducerClass(NginxLogReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }