List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks
public void setNumReduceTasks(int tasks) throws IllegalStateException
From source file:de.l3s.content.timex.extracting.ClueWeb09TimexWriteToHDFS.java
License:Apache License
/** * Runs this tool.//w w w. ja v a 2s.com */ @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("input").hasArg().withDescription("input path").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("output").hasArg().withDescription("output path").create(OUTPUT_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); cmdline = parser.parse(options, args); if (!cmdline.hasOption(INPUT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } if (!cmdline.hasOption(OUTPUT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String input = cmdline.getOptionValue(INPUT_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); LOG.info("Tool name: " + ClueWeb09TimexWriteToHDFS.class.getSimpleName()); LOG.info(" - input: " + input); LOG.info(" - output: " + output); Configuration conf = new Configuration(); long milliSeconds = 10000 * 60 * 60; //x10 default conf.setLong("mapred.task.timeout", milliSeconds); Job job = Job.getInstance(conf, "extract CW tempex and output to HDFS"); job.setJarByClass(ClueWeb09TimexWriteToHDFS.class); job.setNumReduceTasks(0); job.setInputFormatClass(ClueWeb09InputFormat.class); job.setMapperClass(TMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.waitForCompletion(true); return 0; }
From source file:de.l3s.content.timex.extracting.WikiTimex.java
License:Apache License
@SuppressWarnings("static-access") @Override// w ww.j av a 2s .c om public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String language = "en"; // Assume 'en' by default. if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } String inputPath = cmdline.getOptionValue(INPUT_OPTION); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - XML dump file: " + inputPath); LOG.info(" - language: " + language); Job job = Job.getInstance(getConf()); job.setJarByClass(WikiTimex.class); job.setJobName(String.format("CountWikipediaPages[%s: %s, %s: %s]", INPUT_OPTION, inputPath, LANGUAGE_OPTION, language)); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, new Path(inputPath)); if (language != null) { job.getConfiguration().set("wiki.language", language); } job.setInputFormatClass(WikipediaPageInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapperClass(TMapper.class); job.waitForCompletion(true); return 0; }
From source file:de.tuberlin.dima.aim3.HadoopJob.java
License:Open Source License
protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration(getConf())); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } else {/*from w w w .j a v a2s. c om*/ job.setJarByClass(mapper); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setJobName(getCustomJobName(job, mapper)); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java
License:Apache License
/** * pass1: generate collocations, ngrams//from w w w . ja v a 2s .c om */ private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams, int maxNGramSize, int reduceTasks, int minSupport, Window mode, int winsize) throws IOException, ClassNotFoundException, InterruptedException { Configuration con = new Configuration(baseConf); con.setBoolean(EMIT_UNIGRAMS, emitUnigrams); con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize); con.setInt(CollocReducer.MIN_SUPPORT, minSupport); con.set(WINDOW_TYPE, mode.toString()); con.setInt(WINDOW_SIZE, winsize); if (mode.toString().equalsIgnoreCase("DOCUMENT")) { con.setInt("mapred.job.map.memory.mb", 3000); con.set("mapred.child.java.opts", "-Xmx2900M"); con.set("mapred.reduce.child.java.opts", "-Xmx8000M"); con.setInt("mapred.job.reduce.memory.mb", 8120); } else { con.setInt("mapred.job.map.memory.mb", 2000); con.set("mapred.child.java.opts", "-Xmx1900M"); con.set("mapred.reduce.child.java.opts", "-Xmx2900M"); con.setInt("mapred.job.reduce.memory.mb", 3000); } con.setBoolean("mapred.compress.map.output", true); con.setStrings("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec"); con.setBoolean("mapred.compress.output", true); con.setStrings("mapred.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec"); con.setInt("mapred.task.timeout", 6000000); con.setInt("io.sort.factor", 50); con.setInt("mapreduce.map.tasks", 256); con.setInt("dfs.replication", 1); Job job = new Job(con); job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input); job.setJarByClass(CollocDriver.class); job.setMapOutputKeyClass(GramKey.class); job.setMapOutputValueClass(Gram.class); job.setPartitionerClass(GramKeyPartitioner.class); job.setGroupingComparatorClass(GramKeyGroupComparator.class); job.setOutputKeyClass(Gram.class); job.setOutputValueClass(Gram.class); job.setCombinerClass(CollocCombiner.class); FileInputFormat.setInputPaths(job, input); Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY); FileOutputFormat.setOutputPath(job, outputPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(CollocMapper.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setReducerClass(CollocReducer.class); job.setNumReduceTasks(512); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue(); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java
License:Apache License
/** * pass2: perform the LLR calculation/*from w w w . j a v a 2s .c o m*/ */ private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal, boolean emitUnigrams, float minValue, int reduceTasks) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); conf.setLong(AssocReducer.NGRAM_TOTAL, nGramTotal); conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams); conf.setFloat(AssocReducer.MIN_VALUE, minValue); conf.setInt("mapred.job.map.memory.mb", 1280); conf.setInt("mapred.job.reduce.memory.mb", 2560); conf.set("mapred.reduce.child.java.opts", "-Xmx2G"); conf.setInt("mapred.task.timeout", 6000000); conf.set(AssocReducer.ASSOC_METRIC, "llr"); Job job = new Job(conf); job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output + " pruning: " + minValue); job.setJarByClass(CollocDriver.class); job.setMapOutputKeyClass(Gram.class); job.setMapOutputValueClass(Gram.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY)); Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY + "_llr"); FileOutputFormat.setOutputPath(job, outPath); job.setMapperClass(Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class); job.setReducerClass(AssocReducer.class); job.setNumReduceTasks(reduceTasks); // Defines additional single text based output 'text' for the job MultipleOutputs.addNamedOutput(job, "contingency", TextOutputFormat.class, Text.class, Text.class); // Defines additional multi sequencefile based output 'sequence' for the // job MultipleOutputs.addNamedOutput(job, "llr", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "pmi", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "chi", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "dice", TextOutputFormat.class, Text.class, DoubleWritable.class); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.full.Phase3Step3NearDupTuplesCreation.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(Phase3Step3NearDupTuplesCreation.class); job.setJobName(Phase3Step3NearDupTuplesCreation.class.getName()); // mapper//from w w w .j a va 2 s . c o m job.setMapperClass(CreateTuplesMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TreeSet.class); job.setInputFormatClass(TextInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); // paths String commaSeparatedInputFiles = args[0]; String outputPath = args[1]; FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setNumReduceTasks(0); //must be added or the mapper wont be called return job.waitForCompletion(true) ? 0 : 1; }
From source file:diamondmapreduce.DiamondMapReduce.java
License:Apache License
int launchHamond(String[] arguments) throws Exception { //extract diamond, query, reference and output from array String diamond = arguments[0]; String query = arguments[1];//w ww.ja v a2 s . c o m String dataBase = arguments[2]; String outPut = arguments[3]; //set Hadoop configuration Job job = Job.getInstance(getConf(), "DIAMOND"); Configuration conf = job.getConfiguration(); SetConf.setHadoopConf(conf); //get user name userName = HadoopUser.getHadoopUser(); //delete all existing DIAMOND files under current Hadoop user DeleteHDFSFiles.deleteAllFiles(userName); //make Hamond directory on HDFS MakeHamondHDFSdir.makedir(conf, userName); //make DIAMOND database on local then copy to HDFS with query and delete local database MakeDB.makeDB(diamond, dataBase); //copy DIAMOND bin, query and local database file to HDFS CopyFromLocal.copyFromLocal(conf, diamond, query, dataBase, userName); //pass query name and database name to mappers conf.set(QUERY, query); conf.set(DATABASE, dataBase + ".dmnd"); String[] subArgs = Arrays.copyOfRange(arguments, 4, arguments.length); conf.setStrings("DIAMOND-arguments", subArgs); conf.setStrings(OUTPUT, outPut); //add DIAMOND bin and database into distributed cache job.addCacheFile(new URI("/user/" + userName + "/Hamond/diamond")); job.addCacheFile(new URI("/user/" + userName + "/Hamond/" + new Path(dataBase).getName() + ".dmnd")); //set job input and output paths FileInputFormat.addInputPath(job, new Path("/user/" + userName + "/Hamond/" + new Path(query).getName())); FileOutputFormat.setOutputPath(job, new Path("/user/" + userName + "/Hamond/out")); //set job driver and mapper job.setJarByClass(DiamondMapReduce.class); job.setMapperClass(DiamondMapper.class); //set job input format into customized multilines format job.setInputFormatClass(CustomNLineFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(0); return job.waitForCompletion(true) ? 0 : 1; }
From source file:diamondmapreduce.DiamondMapReduce.java
License:Apache License
int launchHamondAWS(String[] arguments) throws Exception { //extract diamond, query, reference and output from array String diamond = arguments[0]; String query = arguments[1];//from w w w . j a va2 s . com String dataBase = arguments[2]; String outPut = arguments[3]; //set Hadoop configuration Job job = Job.getInstance(getConf(), "DIAMOND"); Configuration conf = job.getConfiguration(); SetConf.setHadoopConf(conf); //get user name userName = HadoopUser.getHadoopUser(); //delete all existing DIAMOND files under current Hadoop user DeleteHDFSFiles.deleteAllFiles(userName); //make local Hamond dir awshamondsidefunctions.MakeHamondDir.make(); //copy DIAMOND, query, reference from S3 to master local awshamondsidefunctions.CopyFromS3.copyFromS3(diamond, query, dataBase); //make Hamond directory on HDFS MakeHamondHDFSdir.makedir(conf, userName); //make DIAMOND database on local then copy to HDFS with query and delete local database MakeDB.makeDB("/mnt/Hamond/diamond", "/mnt/Hamond/" + new Path(dataBase).getName()); //copy DIAMOND bin, query and local database file to HDFS CopyFromLocal.copyFromLocal(conf, "/mnt/Hamond/diamond", "/mnt/Hamond/" + new Path(query).getName(), "/mnt/Hamond/" + new Path(dataBase).getName(), userName); //pass query name and database name to mappers conf.set(QUERY, query); conf.set(DATABASE, dataBase); conf.set(OUTPUT, outPut); String[] subArgs = Arrays.copyOfRange(arguments, 4, arguments.length); conf.setStrings("DIAMOND-arguments", subArgs); conf.setStrings(OUTPUT, outPut); //add DIAMOND bin and database into distributed cache job.addCacheFile(new URI("/user/" + userName + "/Hamond/diamond")); job.addCacheFile(new URI("/user/" + userName + "/Hamond/" + new Path(dataBase).getName() + ".dmnd")); //set job input and output paths FileInputFormat.addInputPath(job, new Path("/user/" + userName + "/Hamond/" + new Path(query).getName())); FileOutputFormat.setOutputPath(job, new Path("/user/" + userName + "/Hamond/out")); //set job driver and mapper job.setJarByClass(DiamondMapReduce.class); job.setMapperClass(DiamondMapper.class); job.setReducerClass(AWSDiamondReducer.class); //set job input format into customized multilines format job.setInputFormatClass(CustomNLineFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); return job.waitForCompletion(true) ? 0 : 1; }
From source file:distributed.hadoop.MapReduceJobConfig.java
License:Open Source License
/** * Apply the settings encapsulated in this config and return a Job object * ready for execution.//from w w w .ja v a 2 s .c o m * * @param jobName the name of the job * @param conf the Configuration object that will be wrapped in the Job * @param env environment variables * @return a configured Job object * @throws IOException if a problem occurs * @throws ClassNotFoundException if various classes are not found */ public Job configureForHadoop(String jobName, Configuration conf, Environment env) throws IOException, ClassNotFoundException { String jobTrackerPort = getJobTrackerPort(); if (DistributedJobConfig.isEmpty(jobTrackerPort)) { jobTrackerPort = AbstractHadoopJobConfig.isHadoop2() ? AbstractHadoopJobConfig.DEFAULT_PORT_YARN : AbstractHadoopJobConfig.DEFAULT_PORT; } String jobTracker = getJobTrackerHost() + ":" + jobTrackerPort; if (DistributedJobConfig.isEmpty(jobTracker)) { System.err.println("No " + (AbstractHadoopJobConfig.isHadoop2() ? "resource manager " : "JobTracker ") + "set - running locally..."); } else { jobTracker = environmentSubstitute(jobTracker, env); if (AbstractHadoopJobConfig.isHadoop2()) { conf.set(YARN_RESOURCE_MANAGER_ADDRESS, jobTracker); conf.set(YARN_RESOURCE_MANAGER_SCHEDULER_ADDRESS, environmentSubstitute(getJobTrackerHost(), env) + ":8030"); } else { conf.set(HADOOP_JOB_TRACKER_HOST, jobTracker); } } System.err.println("Using " + (AbstractHadoopJobConfig.isHadoop2() ? "resource manager: " : "jobtracker: ") + jobTracker); if (AbstractHadoopJobConfig.isHadoop2()) { // a few other properties needed to run against Yarn conf.set("yarn.nodemanager.aux-services", "mapreduce_shuffle"); conf.set("mapreduce.framework.name", "yarn"); } if (!DistributedJobConfig.isEmpty(getMapredMaxSplitSize())) { conf.set(AbstractHadoopJobConfig.isHadoop2() ? HADOOP2_MAPRED_MAX_SPLIT_SIZE : HADOOP_MAPRED_MAX_SPLIT_SIZE, getMapredMaxSplitSize()); } // Do any user supplied properties here before creating the Job for (Map.Entry<String, String> e : m_additionalUserSuppliedProperties.entrySet()) { conf.set(e.getKey(), e.getValue()); } m_hdfsConfig.configureForHadoop(conf, env); Job job = new Job(conf, jobName); String numMappers = getNumberOfMaps(); if (!DistributedJobConfig.isEmpty(numMappers)) { numMappers = environmentSubstitute(numMappers, env); ((JobConf) job.getConfiguration()).setNumMapTasks(Integer.parseInt(numMappers)); } // The number of map tasks that will be run simultaneously by a task tracker String maxConcurrentMapTasks = getTaskTrackerMapTasksMaximum(); if (!DistributedJobConfig.isEmpty(maxConcurrentMapTasks)) { ((JobConf) job.getConfiguration()).set("mapred.tasktracker.map.tasks.maximum", maxConcurrentMapTasks); } String numReducers = getNumberOfReducers(); if (!DistributedJobConfig.isEmpty(numReducers)) { numReducers = environmentSubstitute(numReducers, env); job.setNumReduceTasks(Integer.parseInt(numReducers)); if (Integer.parseInt(numReducers) == 0) { System.err.println("Warning - no reducer class set. Configuring for a map only job"); } } else { job.setNumReduceTasks(1); } String mapperClass = getMapperClass(); if (DistributedJobConfig.isEmpty(mapperClass)) { throw new IOException("No mapper class specified!"); } mapperClass = environmentSubstitute(mapperClass, env); @SuppressWarnings("unchecked") Class<? extends Mapper> mc = (Class<? extends Mapper>) Class.forName(mapperClass); job.setMapperClass(mc); String reducerClass = getReducerClass(); if (DistributedJobConfig.isEmpty(reducerClass) && Integer.parseInt(numReducers) > 0) { throw new IOException("No reducer class specified!"); } else if (job.getNumReduceTasks() > 0) { reducerClass = environmentSubstitute(reducerClass, env); @SuppressWarnings("unchecked") Class<? extends Reducer> rc = (Class<? extends Reducer>) Class.forName(reducerClass); job.setReducerClass(rc); } String combinerClass = getCombinerClass(); if (!DistributedJobConfig.isEmpty(combinerClass)) { combinerClass = environmentSubstitute(combinerClass, env); @SuppressWarnings("unchecked") Class<? extends Reducer> cc = (Class<? extends Reducer>) Class.forName(combinerClass); job.setCombinerClass(cc); } String inputFormatClass = getInputFormatClass(); if (DistributedJobConfig.isEmpty(inputFormatClass)) { throw new IOException("No input format class specified"); } inputFormatClass = environmentSubstitute(inputFormatClass, env); @SuppressWarnings("unchecked") Class<? extends InputFormat> ifc = (Class<? extends InputFormat>) Class.forName(inputFormatClass); job.setInputFormatClass(ifc); String outputFormatClass = getOutputFormatClass(); if (DistributedJobConfig.isEmpty(outputFormatClass)) { throw new IOException("No output format class specified"); } outputFormatClass = environmentSubstitute(outputFormatClass, env); @SuppressWarnings("unchecked") Class<? extends OutputFormat> ofc = (Class<? extends OutputFormat>) Class.forName(outputFormatClass); job.setOutputFormatClass(ofc); String mapOutputKeyClass = getMapOutputKeyClass(); if (DistributedJobConfig.isEmpty(mapOutputKeyClass)) { throw new IOException("No map output key class defined"); } mapOutputKeyClass = environmentSubstitute(mapOutputKeyClass, env); Class mokc = Class.forName(mapOutputKeyClass); job.setMapOutputKeyClass(mokc); String mapOutputValueClass = getMapOutputValueClass(); if (DistributedJobConfig.isEmpty(mapOutputValueClass)) { throw new IOException("No map output value class defined"); } mapOutputValueClass = environmentSubstitute(mapOutputValueClass, env); Class movc = Class.forName(mapOutputValueClass); job.setMapOutputValueClass(movc); String outputKeyClass = getOutputKeyClass(); if (DistributedJobConfig.isEmpty(outputKeyClass)) { throw new IOException("No output key class defined"); } outputKeyClass = environmentSubstitute(outputKeyClass, env); Class okc = Class.forName(outputKeyClass); job.setOutputKeyClass(okc); String outputValueClass = getOutputValueClass(); if (DistributedJobConfig.isEmpty(outputValueClass)) { throw new IOException("No output value class defined"); } outputValueClass = environmentSubstitute(outputValueClass, env); Class ovc = Class.forName(outputValueClass); job.setOutputValueClass(ovc); String inputPaths = getInputPaths(); // don't complain if there aren't any as inputs such as HBASE // require other properties to be set if (!DistributedJobConfig.isEmpty(inputPaths)) { inputPaths = environmentSubstitute(inputPaths, env); FileInputFormat.setInputPaths(job, inputPaths); } String outputPath = getOutputPath(); if (DistributedJobConfig.isEmpty(outputPath)) { throw new IOException("No output path specified"); } outputPath = environmentSubstitute(outputPath, env); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:dz.lab.mapred.hbase.custom_output.StartsWithCountJob_HBase.java
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf(), "StartsWithCount-HBase"); job.setJarByClass(getClass());/*from w w w. j a v a 2 s . c o m*/ Scan scan = new Scan(); scan.addColumn(toBytes(FAMILY), toBytes(INPUT_COLUMN)); // set up job with hbase utils TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, StartsWithCountMapper_HBase.class, Text.class, IntWritable.class, job); TableMapReduceUtil.initTableReducerJob(TABLE_NAME, StartsWithCountReducer_HBase.class, job); job.setNumReduceTasks(1); return job.waitForCompletion(true) ? 0 : 1; }