Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:com.antbrains.crf.hadoop.CalcFeatureWeights.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 3 && otherArgs.length != 4) {
        System.err.println("CalcFeatureWeights <inDir> <tmpDir> <outDir> [startStep]");
        System.exit(-1);/*from w  ww .  ja  v a2  s.c om*/
    }
    int startStep = 1;
    if (otherArgs.length == 4) {
        startStep = Integer.valueOf(otherArgs[otherArgs.length - 1]);
    }
    FileSystem fs = FileSystem.get(conf);
    if (startStep <= 1) {
        System.out.println("calc");
        fs.delete(new Path(otherArgs[1]), true);
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());
        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(CalcFeatureMapper.class);
        job.setReducerClass(CalcFeatureReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(MyKey.class);

        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);
        FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step1 failed");
            return;
        }
    }

    if (startStep <= 2)
    // sort
    {
        fs.delete(new Path(otherArgs[2]), true);
        System.out.println("sort");
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());

        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(IdentityMapper.class);
        job.setReducerClass(IdentityReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(MyKey.class);
        job.setMapOutputValueClass(MyValue.class);
        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);

        FileInputFormat.setInputPaths(job, new Path(otherArgs[1]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step2 failed");
            return;
        }
    }

}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java

License:Apache License

/**
 * Test for map-reduce job.//from   w  w w .  j av a2  s  .co  m
 * @throws Exception if failed
 */
@Test
public void map_reduce() throws Exception {
    Job job = newJob();
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(WordCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setSortComparatorClass(Text.Comparator.class);
    job.setGroupingComparatorClass(Text.Comparator.class);

    job.setReducerClass(WordCountReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    File inputDir = folder.newFolder();
    File inputFile = new File(inputDir, "input.txt");
    write(inputFile, new String[] { "a b c d", "a a b c", "c", });

    File outputDir = folder.newFolder();
    outputDir.delete();

    FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
    FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
    assertThat(new SimpleJobRunner().run(job), is(true));
    assertThat(toMap(read(outputDir)), is(map(new String[] { "a", "3", "b", "2", "c", "3", "d", "1", })));
}

From source file:com.asakusafw.runtime.stage.AbstractStageClient.java

License:Apache License

@SuppressWarnings("rawtypes")
private void configureShuffle(Job job, VariableTable variables) {
    Class<? extends Reducer> reducer = getReducerClassOrNull();
    if (reducer != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Reducer: {0}", reducer.getName())); //$NON-NLS-1$
        }//from ww  w.  ja  va  2s  .c o  m
        job.setReducerClass(reducer);
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Reducer: N/A"); //$NON-NLS-1$
        }
        job.setNumReduceTasks(0);
        return;
    }

    Class<? extends Writable> outputKeyClass = or(getShuffleKeyClassOrNull(), NullWritable.class);
    Class<? extends Writable> outputValueClass = or(getShuffleValueClassOrNull(), NullWritable.class);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Shuffle: key={0}, value={1}", //$NON-NLS-1$
                outputKeyClass.getName(), outputValueClass.getName()));
    }
    job.setMapOutputKeyClass(outputKeyClass);
    job.setMapOutputValueClass(outputValueClass);

    Class<? extends Reducer> combiner = getCombinerClassOrNull();
    if (combiner != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Combiner: {0}", combiner.getName())); //$NON-NLS-1$
        }
        job.setCombinerClass(combiner);
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Combiner: N/A"); //$NON-NLS-1$
        }
    }

    Class<? extends Partitioner> partitioner = getPartitionerClassOrNull();
    if (partitioner != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Partitioner: {0}", partitioner.getName())); //$NON-NLS-1$
        }
        job.setPartitionerClass(partitioner);
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Partitioner: DEFAULT"); //$NON-NLS-1$
        }
    }

    Class<? extends RawComparator> groupingComparator = getGroupingComparatorClassOrNull();
    if (groupingComparator != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("GroupingComparator: {0}", groupingComparator.getName())); //$NON-NLS-1$
        }
        job.setGroupingComparatorClass(groupingComparator);
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("GroupingComparator: DEFAULT"); //$NON-NLS-1$
        }
    }

    Class<? extends RawComparator> sortComparator = getSortComparatorClassOrNull();
    if (sortComparator != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("SortComparator: {0}", sortComparator.getName())); //$NON-NLS-1$
        }
        job.setSortComparatorClass(sortComparator);
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("SortComparator: DEFAULT"); //$NON-NLS-1$
        }
    }
}

From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java

License:Apache License

private void updateMerge() throws IOException, InterruptedException {
    Job job = newJob();

    List<StageInput> inputList = new ArrayList<>();
    inputList.add(new StageInput(storage.getHeadContents("*").toString(), TemporaryInputFormat.class,
            MergeJoinBaseMapper.class));
    inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class,
            MergeJoinPatchMapper.class));
    StageInputDriver.set(job, inputList);
    job.setInputFormatClass(StageInputFormat.class);
    job.setMapperClass(StageInputMapper.class);
    job.setMapOutputKeyClass(PatchApplyKey.class);
    job.setMapOutputValueClass(modelClass);

    // combiner may have no effect in normal cases
    job.setReducerClass(MergeJoinReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(modelClass);
    job.setPartitionerClass(PatchApplyKey.Partitioner.class);
    job.setSortComparatorClass(PatchApplyKey.SortComparator.class);
    job.setGroupingComparatorClass(PatchApplyKey.GroupComparator.class);

    TemporaryOutputFormat.setOutputPath(job, getNextDirectory());
    job.setOutputFormatClass(TemporaryOutputFormat.class);
    job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class,
            org.apache.hadoop.mapred.OutputCommitter.class);

    LOG.info(MessageFormat.format("applying patch (merge join): {0} / {1} -> {2}",
            storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
    try {//from w  w  w .  ja v  a  2s .co m
        boolean succeed = job.waitForCompletion(true);
        LOG.info(MessageFormat.format("applied patch (merge join): succeed={0}, {1} / {2} -> {3}", succeed,
                storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        if (succeed == false) {
            throw new IOException(MessageFormat.format("failed to apply patch (merge join): {0} / {1} -> {2}",
                    storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        }
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
    putMeta();
}

From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java

License:Apache License

private void updateTable() throws IOException, InterruptedException {
    Job job = newJob();
    List<StageInput> inputList = new ArrayList<>();
    inputList.add(new StageInput(storage.getHeadContents("*").toString(), TemporaryInputFormat.class,
            TableJoinBaseMapper.class));
    inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class,
            TableJoinPatchMapper.class));
    StageInputDriver.set(job, inputList);
    StageResourceDriver.add(job, storage.getPatchContents("*").toString(), TableJoinBaseMapper.RESOURCE_KEY);
    job.setInputFormatClass(StageInputFormat.class);
    job.setMapperClass(StageInputMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(modelClass);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(modelClass);

    TemporaryOutputFormat.setOutputPath(job, getNextDirectory());
    job.setOutputFormatClass(TemporaryOutputFormat.class);
    job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class,
            org.apache.hadoop.mapred.OutputCommitter.class);

    job.setNumReduceTasks(0);/*from   ww w  .  j  av a  2s . c om*/

    LOG.info(MessageFormat.format("applying patch (table join): {0} / {1} -> {2}",
            storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
    try {
        boolean succeed = job.waitForCompletion(true);
        LOG.info(MessageFormat.format("applied patch (table join): succeed={0}, {1} / {2} -> {3}", succeed,
                storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        if (succeed == false) {
            throw new IOException(MessageFormat.format("failed to apply patch (table join): {0} / {1} -> {2}",
                    storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        }
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
    putMeta();
}

From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java

License:Apache License

private void create() throws InterruptedException, IOException {
    Job job = newJob();
    List<StageInput> inputList = new ArrayList<>();
    inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class,
            CreateCacheMapper.class));
    StageInputDriver.set(job, inputList);
    job.setInputFormatClass(StageInputFormat.class);
    job.setMapperClass(StageInputMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(modelClass);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(modelClass);

    TemporaryOutputFormat.setOutputPath(job, getNextDirectory());
    job.setOutputFormatClass(TemporaryOutputFormat.class);
    job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class,
            org.apache.hadoop.mapred.OutputCommitter.class);

    job.setNumReduceTasks(0);//from  w  ww . j ava 2 s .co  m

    LOG.info(MessageFormat.format("applying patch (no join): {0} / (empty) -> {2}",
            storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
    try {
        boolean succeed = job.waitForCompletion(true);
        LOG.info(MessageFormat.format("applied patch (no join): succeed={0}, {1} / (empty) -> {3}", succeed,
                storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        if (succeed == false) {
            throw new IOException(MessageFormat.format("failed to apply patch (no join): {0} / (empty) -> {2}",
                    storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        }
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
    putMeta();
}

From source file:com.asp.tranlog.ImportTsv.java

License:Apache License

/**
 * Sets up the actual job.//from w ww .  j  a v  a 2s . com
 * 
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
        throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
    if (actualSeparator != null) {
        conf.set(SEPARATOR_CONF_KEY, new String(Base64.encodeBytes(actualSeparator.getBytes())));
    }

    // See if a non-default Mapper was set
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    String tableName = args[0];
    Path inputDir = new Path(args[1]);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(mapperClass);
    FileInputFormat.setInputPaths(job, inputDir);

    String inputCodec = conf.get(INPUT_LZO_KEY);
    if (inputCodec == null) {
        FileInputFormat.setMaxInputSplitSize(job, 67108864l); // max split
        // size =
        // 64m
        job.setInputFormatClass(TextInputFormat.class);
    } else {
        if (inputCodec.equalsIgnoreCase("lzo"))
            job.setInputFormatClass(LzoTextInputFormat.class);
        else {
            usage("not supported compression codec!");
            System.exit(-1);
        }
    }

    job.setMapperClass(mapperClass);

    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
    if (hfileOutPath != null) {
        HTable table = new HTable(conf, tableName);
        job.setReducerClass(PutSortReducer.class);
        Path outputDir = new Path(hfileOutPath);
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        HFileOutputFormat.configureIncrementalLoad(job, table);
    } else {
        // No reducers. Just write straight to table. Call
        // initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /*
                                                                                                       * Guava used by TsvParser
                                                                                                       */);
    return job;
}

From source file:com.avira.couchdoop.demo.BenchmarkUpdater.java

License:Apache License

public Job configureJob(Configuration conf, String input) throws IOException {
    conf.setInt("mapreduce.map.failures.maxpercent", 5);
    conf.setInt("mapred.max.map.failures.percent", 5);
    conf.setInt("mapred.max.tracker.failures", 20);

    Job job = Job.getInstance(conf);
    job.setJarByClass(BenchmarkUpdater.class);

    // User classpath takes precedence in favor of Hadoop classpath.
    // This is because the Couchbase client requires a newer version of
    // org.apache.httpcomponents:httpcore.
    job.setUserClassesTakesPrecedence(true);

    // Input//from www . ja  v  a  2s  .  c  o m
    FileInputFormat.setInputPaths(job, input);

    // Mapper
    job.setMapperClass(BenchmarkUpdateMapper.class);
    job.setMapOutputKeyClass(String.class);
    job.setMapOutputValueClass(CouchbaseAction.class);

    // Reducer
    job.setNumReduceTasks(0);

    // Output
    job.setOutputFormatClass(CouchbaseOutputFormat.class);
    job.setMapOutputKeyClass(String.class);
    job.setMapOutputValueClass(CouchbaseAction.class);

    return job;
}

From source file:com.avira.couchdoop.jobs.CouchbaseExporter.java

License:Apache License

public Job configureJob(Configuration conf, String input) throws IOException {
    conf.setInt("mapreduce.map.failures.maxpercent", 5);
    conf.setInt("mapred.max.map.failures.percent", 5);
    conf.setInt("mapred.max.tracker.failures", 20);

    Job job = Job.getInstance(conf);
    job.setJarByClass(CouchbaseExporter.class);

    // Input/*w w  w  .java2  s  .  co m*/
    FileInputFormat.setInputPaths(job, input);

    // Mapper
    job.setMapperClass(CsvToCouchbaseMapper.class);
    job.setMapOutputKeyClass(String.class);
    job.setMapOutputValueClass(CouchbaseAction.class);

    // Reducer
    job.setNumReduceTasks(0);

    // Output
    job.setOutputFormatClass(CouchbaseOutputFormat.class);
    job.setOutputKeyClass(String.class);
    job.setOutputValueClass(CouchbaseAction.class);

    return job;
}

From source file:com.avira.couchdoop.jobs.CouchbaseViewImporter.java

License:Apache License

public Job configureJob(Configuration conf, String output) throws IOException {
    conf.setInt("mapreduce.map.failures.maxpercent", 5);
    conf.setInt("mapred.max.map.failures.percent", 5);
    conf.setInt("mapred.max.tracker.failures", 20);

    Job job = Job.getInstance(conf);
    job.setJarByClass(CouchbaseViewImporter.class);

    // Input/*from   w  ww  .j av a  2s  .  com*/
    job.setInputFormatClass(CouchbaseViewInputFormat.class);

    // Mapper
    job.setMapperClass(CouchbaseViewToFileMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // Reducer
    job.setNumReduceTasks(0);

    // Output
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileOutputFormat.setOutputPath(job, new Path(output));

    return job;
}