Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:de.l3s.content.timex.extracting.ClueWeb09TimexWriteToHDFS.java

License:Apache License

/**
 * Runs this tool.//w w  w. ja v  a  2s.com
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("input").hasArg().withDescription("input path").create(INPUT_OPTION));

    options.addOption(
            OptionBuilder.withArgName("output").hasArg().withDescription("output path").create(OUTPUT_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    cmdline = parser.parse(options, args);

    if (!cmdline.hasOption(INPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    if (!cmdline.hasOption(OUTPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);

    String output = cmdline.getOptionValue(OUTPUT_OPTION);

    LOG.info("Tool name: " + ClueWeb09TimexWriteToHDFS.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);

    Configuration conf = new Configuration();
    long milliSeconds = 10000 * 60 * 60; //x10 default
    conf.setLong("mapred.task.timeout", milliSeconds);
    Job job = Job.getInstance(conf, "extract CW tempex and output to HDFS");
    job.setJarByClass(ClueWeb09TimexWriteToHDFS.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(ClueWeb09InputFormat.class);
    job.setMapperClass(TMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));
    job.waitForCompletion(true);

    return 0;
}

From source file:de.l3s.content.timex.extracting.WikiTimex.java

License:Apache License

@SuppressWarnings("static-access")
@Override// w ww.j  av a  2s  .c  om
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String language = "en"; // Assume 'en' by default.
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - XML dump file: " + inputPath);
    LOG.info(" - language: " + language);

    Job job = Job.getInstance(getConf());
    job.setJarByClass(WikiTimex.class);
    job.setJobName(String.format("CountWikipediaPages[%s: %s, %s: %s]", INPUT_OPTION, inputPath,
            LANGUAGE_OPTION, language));

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, new Path(inputPath));

    if (language != null) {
        job.getConfiguration().set("wiki.language", language);
    }

    job.setInputFormatClass(WikipediaPageInputFormat.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    job.setMapperClass(TMapper.class);

    job.waitForCompletion(true);

    return 0;
}

From source file:de.tuberlin.dima.aim3.HadoopJob.java

License:Open Source License

protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat) throws IOException {

    Job job = new Job(new Configuration(getConf()));
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    } else {/*from   w w w  .j  a v a2s.  c  om*/
        job.setJarByClass(mapper);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setNumReduceTasks(0);

    job.setJobName(getCustomJobName(job, mapper));

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java

License:Apache License

/**
 * pass1: generate collocations, ngrams//from  w w  w . ja v  a 2s .c om
 */
private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams,
        int maxNGramSize, int reduceTasks, int minSupport, Window mode, int winsize)
        throws IOException, ClassNotFoundException, InterruptedException {

    Configuration con = new Configuration(baseConf);
    con.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize);
    con.setInt(CollocReducer.MIN_SUPPORT, minSupport);
    con.set(WINDOW_TYPE, mode.toString());
    con.setInt(WINDOW_SIZE, winsize);

    if (mode.toString().equalsIgnoreCase("DOCUMENT")) {
        con.setInt("mapred.job.map.memory.mb", 3000);

        con.set("mapred.child.java.opts", "-Xmx2900M");
        con.set("mapred.reduce.child.java.opts", "-Xmx8000M");

        con.setInt("mapred.job.reduce.memory.mb", 8120);
    } else {
        con.setInt("mapred.job.map.memory.mb", 2000);

        con.set("mapred.child.java.opts", "-Xmx1900M");
        con.set("mapred.reduce.child.java.opts", "-Xmx2900M");

        con.setInt("mapred.job.reduce.memory.mb", 3000);
    }
    con.setBoolean("mapred.compress.map.output", true);
    con.setStrings("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");
    con.setBoolean("mapred.compress.output", true);
    con.setStrings("mapred.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");
    con.setInt("mapred.task.timeout", 6000000);
    con.setInt("io.sort.factor", 50);
    con.setInt("mapreduce.map.tasks", 256);
    con.setInt("dfs.replication", 1);
    Job job = new Job(con);
    job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(GramKey.class);
    job.setMapOutputValueClass(Gram.class);
    job.setPartitionerClass(GramKeyPartitioner.class);
    job.setGroupingComparatorClass(GramKeyGroupComparator.class);

    job.setOutputKeyClass(Gram.class);
    job.setOutputValueClass(Gram.class);

    job.setCombinerClass(CollocCombiner.class);

    FileInputFormat.setInputPaths(job, input);

    Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(CollocMapper.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(CollocReducer.class);
    job.setNumReduceTasks(512);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

    return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue();
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java

License:Apache License

/**
 * pass2: perform the LLR calculation/*from  w  w  w . j  a  v  a  2s  .c o m*/
 */
private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal,
        boolean emitUnigrams, float minValue, int reduceTasks)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    conf.setLong(AssocReducer.NGRAM_TOTAL, nGramTotal);
    conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    conf.setFloat(AssocReducer.MIN_VALUE, minValue);
    conf.setInt("mapred.job.map.memory.mb", 1280);
    conf.setInt("mapred.job.reduce.memory.mb", 2560);
    conf.set("mapred.reduce.child.java.opts", "-Xmx2G");
    conf.setInt("mapred.task.timeout", 6000000);
    conf.set(AssocReducer.ASSOC_METRIC, "llr");

    Job job = new Job(conf);
    job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output + " pruning: " + minValue);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(Gram.class);
    job.setMapOutputValueClass(Gram.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY));
    Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY + "_llr");
    FileOutputFormat.setOutputPath(job, outPath);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class);
    job.setReducerClass(AssocReducer.class);
    job.setNumReduceTasks(reduceTasks);
    // Defines additional single text based output 'text' for the job
    MultipleOutputs.addNamedOutput(job, "contingency", TextOutputFormat.class, Text.class, Text.class);

    // Defines additional multi sequencefile based output 'sequence' for the
    // job
    MultipleOutputs.addNamedOutput(job, "llr", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "pmi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "chi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "dice", TextOutputFormat.class, Text.class, DoubleWritable.class);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.full.Phase3Step3NearDupTuplesCreation.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Phase3Step3NearDupTuplesCreation.class);
    job.setJobName(Phase3Step3NearDupTuplesCreation.class.getName());

    // mapper//from   w  w w  .j  a va  2  s .  c  o  m
    job.setMapperClass(CreateTuplesMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TreeSet.class);

    job.setInputFormatClass(TextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setNumReduceTasks(0); //must be added or the mapper wont be called

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:diamondmapreduce.DiamondMapReduce.java

License:Apache License

int launchHamond(String[] arguments) throws Exception {

    //extract diamond, query, reference and output from array
    String diamond = arguments[0];
    String query = arguments[1];//w  ww.ja  v a2 s  .  c o  m
    String dataBase = arguments[2];
    String outPut = arguments[3];

    //set Hadoop configuration
    Job job = Job.getInstance(getConf(), "DIAMOND");
    Configuration conf = job.getConfiguration();
    SetConf.setHadoopConf(conf);

    //get user name
    userName = HadoopUser.getHadoopUser();

    //delete all existing DIAMOND files under current Hadoop user
    DeleteHDFSFiles.deleteAllFiles(userName);

    //make Hamond directory on HDFS
    MakeHamondHDFSdir.makedir(conf, userName);

    //make DIAMOND database on local then copy to HDFS with query and delete local database
    MakeDB.makeDB(diamond, dataBase);

    //copy DIAMOND bin, query and local database file to HDFS
    CopyFromLocal.copyFromLocal(conf, diamond, query, dataBase, userName);

    //pass query name and database name to mappers
    conf.set(QUERY, query);
    conf.set(DATABASE, dataBase + ".dmnd");
    String[] subArgs = Arrays.copyOfRange(arguments, 4, arguments.length);
    conf.setStrings("DIAMOND-arguments", subArgs);
    conf.setStrings(OUTPUT, outPut);

    //add DIAMOND bin and database into distributed cache
    job.addCacheFile(new URI("/user/" + userName + "/Hamond/diamond"));
    job.addCacheFile(new URI("/user/" + userName + "/Hamond/" + new Path(dataBase).getName() + ".dmnd"));

    //set job input and output paths
    FileInputFormat.addInputPath(job, new Path("/user/" + userName + "/Hamond/" + new Path(query).getName()));
    FileOutputFormat.setOutputPath(job, new Path("/user/" + userName + "/Hamond/out"));

    //set job driver and mapper
    job.setJarByClass(DiamondMapReduce.class);
    job.setMapperClass(DiamondMapper.class);

    //set job input format into customized multilines format
    job.setInputFormatClass(CustomNLineFileInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(0);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:diamondmapreduce.DiamondMapReduce.java

License:Apache License

int launchHamondAWS(String[] arguments) throws Exception {

    //extract diamond, query, reference and output from array
    String diamond = arguments[0];
    String query = arguments[1];//from w  w  w .  j a va2  s . com
    String dataBase = arguments[2];
    String outPut = arguments[3];

    //set Hadoop configuration
    Job job = Job.getInstance(getConf(), "DIAMOND");
    Configuration conf = job.getConfiguration();
    SetConf.setHadoopConf(conf);

    //get user name
    userName = HadoopUser.getHadoopUser();

    //delete all existing DIAMOND files under current Hadoop user
    DeleteHDFSFiles.deleteAllFiles(userName);

    //make local Hamond dir
    awshamondsidefunctions.MakeHamondDir.make();

    //copy DIAMOND, query, reference from S3 to master local
    awshamondsidefunctions.CopyFromS3.copyFromS3(diamond, query, dataBase);

    //make Hamond directory on HDFS
    MakeHamondHDFSdir.makedir(conf, userName);

    //make DIAMOND database on local then copy to HDFS with query and delete local database
    MakeDB.makeDB("/mnt/Hamond/diamond", "/mnt/Hamond/" + new Path(dataBase).getName());

    //copy DIAMOND bin, query and local database file to HDFS
    CopyFromLocal.copyFromLocal(conf, "/mnt/Hamond/diamond", "/mnt/Hamond/" + new Path(query).getName(),
            "/mnt/Hamond/" + new Path(dataBase).getName(), userName);

    //pass query name and database name to mappers
    conf.set(QUERY, query);
    conf.set(DATABASE, dataBase);
    conf.set(OUTPUT, outPut);
    String[] subArgs = Arrays.copyOfRange(arguments, 4, arguments.length);
    conf.setStrings("DIAMOND-arguments", subArgs);
    conf.setStrings(OUTPUT, outPut);

    //add DIAMOND bin and database into distributed cache
    job.addCacheFile(new URI("/user/" + userName + "/Hamond/diamond"));
    job.addCacheFile(new URI("/user/" + userName + "/Hamond/" + new Path(dataBase).getName() + ".dmnd"));

    //set job input and output paths
    FileInputFormat.addInputPath(job, new Path("/user/" + userName + "/Hamond/" + new Path(query).getName()));
    FileOutputFormat.setOutputPath(job, new Path("/user/" + userName + "/Hamond/out"));

    //set job driver and mapper
    job.setJarByClass(DiamondMapReduce.class);
    job.setMapperClass(DiamondMapper.class);
    job.setReducerClass(AWSDiamondReducer.class);

    //set job input format into customized multilines format
    job.setInputFormatClass(CustomNLineFileInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:distributed.hadoop.MapReduceJobConfig.java

License:Open Source License

/**
 * Apply the settings encapsulated in this config and return a Job object
 * ready for execution.//from w  w  w  .ja  v a  2 s .c  o  m
 * 
 * @param jobName the name of the job
 * @param conf the Configuration object that will be wrapped in the Job
 * @param env environment variables
 * @return a configured Job object
 * @throws IOException if a problem occurs
 * @throws ClassNotFoundException if various classes are not found
 */
public Job configureForHadoop(String jobName, Configuration conf, Environment env)
        throws IOException, ClassNotFoundException {

    String jobTrackerPort = getJobTrackerPort();
    if (DistributedJobConfig.isEmpty(jobTrackerPort)) {
        jobTrackerPort = AbstractHadoopJobConfig.isHadoop2() ? AbstractHadoopJobConfig.DEFAULT_PORT_YARN
                : AbstractHadoopJobConfig.DEFAULT_PORT;
    }
    String jobTracker = getJobTrackerHost() + ":" + jobTrackerPort;
    if (DistributedJobConfig.isEmpty(jobTracker)) {
        System.err.println("No " + (AbstractHadoopJobConfig.isHadoop2() ? "resource manager " : "JobTracker ")
                + "set - running locally...");
    } else {
        jobTracker = environmentSubstitute(jobTracker, env);
        if (AbstractHadoopJobConfig.isHadoop2()) {
            conf.set(YARN_RESOURCE_MANAGER_ADDRESS, jobTracker);
            conf.set(YARN_RESOURCE_MANAGER_SCHEDULER_ADDRESS,
                    environmentSubstitute(getJobTrackerHost(), env) + ":8030");
        } else {
            conf.set(HADOOP_JOB_TRACKER_HOST, jobTracker);
        }
    }
    System.err.println("Using " + (AbstractHadoopJobConfig.isHadoop2() ? "resource manager: " : "jobtracker: ")
            + jobTracker);

    if (AbstractHadoopJobConfig.isHadoop2()) {
        // a few other properties needed to run against Yarn
        conf.set("yarn.nodemanager.aux-services", "mapreduce_shuffle");
        conf.set("mapreduce.framework.name", "yarn");
    }

    if (!DistributedJobConfig.isEmpty(getMapredMaxSplitSize())) {
        conf.set(AbstractHadoopJobConfig.isHadoop2() ? HADOOP2_MAPRED_MAX_SPLIT_SIZE
                : HADOOP_MAPRED_MAX_SPLIT_SIZE, getMapredMaxSplitSize());
    }

    // Do any user supplied properties here before creating the Job
    for (Map.Entry<String, String> e : m_additionalUserSuppliedProperties.entrySet()) {
        conf.set(e.getKey(), e.getValue());
    }

    m_hdfsConfig.configureForHadoop(conf, env);
    Job job = new Job(conf, jobName);

    String numMappers = getNumberOfMaps();
    if (!DistributedJobConfig.isEmpty(numMappers)) {
        numMappers = environmentSubstitute(numMappers, env);
        ((JobConf) job.getConfiguration()).setNumMapTasks(Integer.parseInt(numMappers));
    }

    // The number of map tasks that will be run simultaneously by a task tracker
    String maxConcurrentMapTasks = getTaskTrackerMapTasksMaximum();
    if (!DistributedJobConfig.isEmpty(maxConcurrentMapTasks)) {
        ((JobConf) job.getConfiguration()).set("mapred.tasktracker.map.tasks.maximum", maxConcurrentMapTasks);
    }

    String numReducers = getNumberOfReducers();
    if (!DistributedJobConfig.isEmpty(numReducers)) {
        numReducers = environmentSubstitute(numReducers, env);
        job.setNumReduceTasks(Integer.parseInt(numReducers));

        if (Integer.parseInt(numReducers) == 0) {
            System.err.println("Warning - no reducer class set. Configuring for a map only job");
        }
    } else {
        job.setNumReduceTasks(1);
    }
    String mapperClass = getMapperClass();
    if (DistributedJobConfig.isEmpty(mapperClass)) {
        throw new IOException("No mapper class specified!");
    }
    mapperClass = environmentSubstitute(mapperClass, env);

    @SuppressWarnings("unchecked")
    Class<? extends Mapper> mc = (Class<? extends Mapper>) Class.forName(mapperClass);

    job.setMapperClass(mc);

    String reducerClass = getReducerClass();
    if (DistributedJobConfig.isEmpty(reducerClass) && Integer.parseInt(numReducers) > 0) {
        throw new IOException("No reducer class specified!");
    } else if (job.getNumReduceTasks() > 0) {
        reducerClass = environmentSubstitute(reducerClass, env);

        @SuppressWarnings("unchecked")
        Class<? extends Reducer> rc = (Class<? extends Reducer>) Class.forName(reducerClass);

        job.setReducerClass(rc);
    }

    String combinerClass = getCombinerClass();
    if (!DistributedJobConfig.isEmpty(combinerClass)) {
        combinerClass = environmentSubstitute(combinerClass, env);

        @SuppressWarnings("unchecked")
        Class<? extends Reducer> cc = (Class<? extends Reducer>) Class.forName(combinerClass);

        job.setCombinerClass(cc);
    }

    String inputFormatClass = getInputFormatClass();
    if (DistributedJobConfig.isEmpty(inputFormatClass)) {
        throw new IOException("No input format class specified");
    }
    inputFormatClass = environmentSubstitute(inputFormatClass, env);

    @SuppressWarnings("unchecked")
    Class<? extends InputFormat> ifc = (Class<? extends InputFormat>) Class.forName(inputFormatClass);

    job.setInputFormatClass(ifc);

    String outputFormatClass = getOutputFormatClass();
    if (DistributedJobConfig.isEmpty(outputFormatClass)) {
        throw new IOException("No output format class specified");
    }
    outputFormatClass = environmentSubstitute(outputFormatClass, env);

    @SuppressWarnings("unchecked")
    Class<? extends OutputFormat> ofc = (Class<? extends OutputFormat>) Class.forName(outputFormatClass);
    job.setOutputFormatClass(ofc);

    String mapOutputKeyClass = getMapOutputKeyClass();
    if (DistributedJobConfig.isEmpty(mapOutputKeyClass)) {
        throw new IOException("No map output key class defined");
    }
    mapOutputKeyClass = environmentSubstitute(mapOutputKeyClass, env);
    Class mokc = Class.forName(mapOutputKeyClass);
    job.setMapOutputKeyClass(mokc);

    String mapOutputValueClass = getMapOutputValueClass();
    if (DistributedJobConfig.isEmpty(mapOutputValueClass)) {
        throw new IOException("No map output value class defined");
    }
    mapOutputValueClass = environmentSubstitute(mapOutputValueClass, env);
    Class movc = Class.forName(mapOutputValueClass);
    job.setMapOutputValueClass(movc);

    String outputKeyClass = getOutputKeyClass();
    if (DistributedJobConfig.isEmpty(outputKeyClass)) {
        throw new IOException("No output key class defined");
    }
    outputKeyClass = environmentSubstitute(outputKeyClass, env);
    Class okc = Class.forName(outputKeyClass);
    job.setOutputKeyClass(okc);

    String outputValueClass = getOutputValueClass();
    if (DistributedJobConfig.isEmpty(outputValueClass)) {
        throw new IOException("No output value class defined");
    }
    outputValueClass = environmentSubstitute(outputValueClass, env);
    Class ovc = Class.forName(outputValueClass);
    job.setOutputValueClass(ovc);

    String inputPaths = getInputPaths();
    // don't complain if there aren't any as inputs such as HBASE
    // require other properties to be set
    if (!DistributedJobConfig.isEmpty(inputPaths)) {
        inputPaths = environmentSubstitute(inputPaths, env);
        FileInputFormat.setInputPaths(job, inputPaths);
    }

    String outputPath = getOutputPath();
    if (DistributedJobConfig.isEmpty(outputPath)) {
        throw new IOException("No output path specified");
    }
    outputPath = environmentSubstitute(outputPath, env);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:dz.lab.mapred.hbase.custom_output.StartsWithCountJob_HBase.java

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf(), "StartsWithCount-HBase");
    job.setJarByClass(getClass());/*from   w  w  w. j a  v a  2  s . c o  m*/

    Scan scan = new Scan();
    scan.addColumn(toBytes(FAMILY), toBytes(INPUT_COLUMN));
    // set up job with hbase utils
    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, StartsWithCountMapper_HBase.class, Text.class,
            IntWritable.class, job);
    TableMapReduceUtil.initTableReducerJob(TABLE_NAME, StartsWithCountReducer_HBase.class, job);

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}