List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass
public void setJarByClass(Class<?> cls)
From source file:com.ema.hadoop.wordcount.WordCount_cache.java
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: WordCount <input path> <output path>"); System.exit(-1);//from w w w . ja v a 2s .co m } // First we write the stop word list // it could also be a file manually loaded into HDFS String[] stopwords = { "the", "a" }; Configuration configuration = new Configuration(); FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration); Path file = new Path("hdfs://localhost:9000/user/student/stop_words.txt"); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file, new Progressable() { @Override public void progress() { out.println("...bytes written"); } }); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); for (String w : stopwords) { br.write(w + "\n"); } br.close(); hdfs.close(); Job job = Job.getInstance(); job.addCacheFile(new Path("hdfs://localhost:9000/user/student/stop_words.txt").toUri()); job.setJarByClass(WordCount_cache.class); job.setJobName("Word count job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(WCMapper_cache.class); job.setReducerClass(WCReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ery.hadoop.mrddx.client.MRJOBClient.java
@Override public void run(Map<String, String> paramMap) throws Exception { // license/* ww w . j ava2 s. com*/ // License.checkLicense(); // ?? // ?MR?job Configuration conf = new Configuration(); // ? for (String key : paramMap.keySet()) { String value = paramMap.get(key); if (null != value) {// ? value = value.replaceAll("\\\\n", "\n"); value = value.replaceAll("\\\\r", "\r"); conf.set(key, value); paramMap.put(key, value); } } String debug = paramMap.get(MRConfiguration.INTERNAL_JOB_LOG_DEBUG); if (null != debug) { String rownum = paramMap.get(MRConfiguration.INTERNAL_JOB_LOG_DEBUG_ROWNUM); conf.setInt(MRConfiguration.INTERNAL_JOB_LOG_DEBUG, Integer.parseInt(debug)); conf.setInt(MRConfiguration.INTERNAL_JOB_LOG_DEBUG_ROWNUM, Integer.parseInt(rownum)); } // ?? this.printParameter(paramMap); MRJOBService mrJobService = new MRJOBService(); // jobconfjob Job job = Job.getInstance(conf); job.setJarByClass(MRJOBService.class); mrJobService.run(paramMap, job); // if (mrJobService.isJobRun(conf)) { // } else { // JobConf jobConf = new JobConf(conf, MRJOBService.class); // mrJobService.run(paramMap, jobConf); // } }
From source file:com.example.bigtable.sample.CellCounter.java
License:Apache License
/** * Sets up the actual job./*from w ww . j av a 2 s . co m*/ * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path outputDir = new Path(args[1]); String reportSeparatorString = (args.length > 2) ? args[2] : ":"; conf.set("ReportSeparator", reportSeparatorString); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(CellCounter.class); Scan scan = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, outputDir); job.setReducerClass(IntSumReducer.class); return job; }
From source file:com.example.bigtable.sample.WordCountHBase.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount-hbase <in> [<in>...] <table-name>"); System.exit(2);//w w w . j a v a 2 s . c o m } Job job = Job.getInstance(conf, "word count"); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } TableName tableName = TableName.valueOf(otherArgs[otherArgs.length - 1]); try { CreateTable.createTable(tableName, conf, Collections.singletonList(Bytes.toString(COLUMN_FAMILY))); } catch (Exception e) { LOG.error("Could not create the table.", e); } job.setJarByClass(WordCountHBase.class); job.setMapperClass(TokenizerMapper.class); job.setMapOutputValueClass(IntWritable.class); TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), MyTableReducer.class, job); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.example.Driver.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Your job name"); job.setJarByClass(Driver.class); logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: " + Arrays.toString(args)); if (args.length < 2) { logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar() + " input_files output_directory"); return 1; }//w w w .ja v a 2 s . c o m job.setMapperClass(WordcountMapper.class); logger.info("mapper class is " + job.getMapperClass()); //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(IntWritable.class); logger.info("mapper output key class is " + job.getMapOutputKeyClass()); logger.info("mapper output value class is " + job.getMapOutputValueClass()); job.setReducerClass(WordcountReducer.class); logger.info("reducer class is " + job.getReducerClass()); job.setCombinerClass(WordcountReducer.class); logger.info("combiner class is " + job.getCombinerClass()); //When you are not runnign any Reducer //OR job.setNumReduceTasks(0); // logger.info("number of reduce task is " + job.getNumReduceTasks()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); logger.info("output key class is " + job.getOutputKeyClass()); logger.info("output value class is " + job.getOutputValueClass()); job.setInputFormatClass(TextInputFormat.class); logger.info("input format class is " + job.getInputFormatClass()); job.setOutputFormatClass(TextOutputFormat.class); logger.info("output format class is " + job.getOutputFormatClass()); Path filePath = new Path(args[0]); logger.info("input path " + filePath); FileInputFormat.setInputPaths(job, filePath); Path outputPath = new Path(args[1]); logger.info("output path " + outputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); return 0; }
From source file:com.examples.ch03.ParseWeblogs_Ex_1.java
public int run(String[] args) throws Exception { Path inputPath = new Path("apache_clf.txt"); Path outputPath = new Path("output"); Configuration conf = getConf(); Job weblogJob = Job.getInstance(conf); weblogJob.setJobName("Weblog Transformer"); weblogJob.setJarByClass(getClass()); weblogJob.setNumReduceTasks(0);//w ww . j a v a2 s. c o m weblogJob.setMapperClass(CLFMapper_Ex_1.class); weblogJob.setMapOutputKeyClass(Text.class); weblogJob.setMapOutputValueClass(Text.class); weblogJob.setOutputKeyClass(Text.class); weblogJob.setOutputValueClass(Text.class); weblogJob.setInputFormatClass(TextInputFormat.class); weblogJob.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(weblogJob, inputPath); FileOutputFormat.setOutputPath(weblogJob, outputPath); if (weblogJob.waitForCompletion(true)) { return 0; } return 1; }
From source file:com.facebook.hiveio.mapreduce.output.WritingTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); handleCommandLine(args, conf);//from www .ja v a 2s. c om HadoopUtils.setMapAttempts(conf, 1); adjustConfigurationForHive(conf); HiveTools.setupJob(conf); Job job = new Job(conf, "hive-io-writing"); if (job.getJar() == null) { job.setJarByClass(getClass()); } job.setMapperClass(SampleMapper.class); job.setInputFormatClass(SampleInputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(HiveWritableRecord.class); job.setOutputFormatClass(SampleOutputFormat.class); job.setNumReduceTasks(0); job.submit(); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.fanlehai.hadoop.join.CompositeJoin.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job./*from ww w. ja va 2 s . c om*/ * * @throws IOException * When there is communication problems with the job tracker. */ @SuppressWarnings("rawtypes") public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String join_reduces = conf.get(REDUCES_PER_HOST); if (join_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces); } Job job = Job.getInstance(conf); job.setJobName("join"); job.setJarByClass(CompositeJoin.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); Class<? extends InputFormat> inputFormatClass = KeyValueTextInputFormat.class;// SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = Text.class;// BytesWritable.class; Class<? extends Writable> outputValueClass = Text.class;//TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } String strOut = otherArgs.remove(otherArgs.size() - 1); FileSystem.get(new Configuration()).delete(new Path(strOut), true); FileOutputFormat.setOutputPath(job, new Path(strOut)); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } job.setInputFormatClass(CompositeInputFormat.class); job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); job.setOutputFormatClass(outputFormatClass); job.setMapperClass(MapComposite.class); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.fanlehai.hadoop.serialize.avro.MapReduceAvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();/*from w w w. j ava 2 s . c o m*/ } FileSystem.get(new Configuration()).delete(new Path(args[1]), true); Job job = Job.getInstance(super.getConf(), "AvroWordCount"); job.setJarByClass(MapReduceAvroWordCount.class); job.setJobName("AvroWordCount"); // We call setOutputSchema first so we can override the configuration // parameters it sets AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT))); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setSortComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 1 : 0; }
From source file:com.fanlehai.hadoop.serialize.avro.MapReduceColorCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();/*from ww w. j a v a2s . c om*/ } FileSystem.get(new Configuration()).delete(new Path(args[1]), true); Job job = Job.getInstance(super.getConf(), "MapReduceAvroWordCount"); job.setJarByClass(MapReduceColorCount.class); job.setJobName("Color Count"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(ColorCountMapper.class); AvroJob.setInputKeySchema(job, User.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return job.waitForCompletion(true) ? 1 : 0; }