Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.antbrains.crf.hadoop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from w ww .  ja  v  a  2  s  .co  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.app.hadoopexample.MaxTemperatureDriver.java

public int run(String[] arg) throws Exception

{
    String[] args = { "C:/Hadoop/input/LICENSE.txt", "C:/Hadoop/output/LICENSE.txt" };
    if (args.length != 2) {

        System.err.println("Usage: MaxTemperatureDriver <input path> <outputpath>");

        System.exit(-1);/*from w w  w .j  av a2 s . co m*/

    }

    Job job = new Job();

    job.setJarByClass(MaxTemperatureDriver.class);

    job.setJobName("Max Temperature");

    FileInputFormat.addInputPath(job, new Path(args[0]));

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);

    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;

}

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

/** API for Java clients;visible for testing;may become a public API eventually */
int run(Options options) throws Exception {
    if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) {
        throw new IllegalStateException(
                "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported "
                        + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, "
                        + "which is required for passing files via --files and --libjars");
    }/*from   ww w .  j a  va  2 s.c  o m*/

    long programStartTime = System.nanoTime();
    getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments);

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (options.log4jConfigFile != null) {
        Utils.setLogConfigFile(options.log4jConfigFile, getConf());
        addDistributedCacheFile(options.log4jConfigFile, getConf());
    }

    Configuration config = HBaseConfiguration.create();
    Job job = Job.getInstance(config);
    job.setJarByClass(getClass());

    // To be able to run this example from eclipse, we need to make sure 
    // the built jar is distributed to the map-reduce tasks from the
    // local file system.
    job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar"));

    FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration());
    if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) {
        return -1;
    }
    Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR);
    Path outputReduceDir = new Path(options.outputDir, "reducers");

    int reducers = 1;

    Scan scan = new Scan();
    scan.addFamily(CF);
    // tag::SETUP[]
    scan.setCaching(500); // <1>
    scan.setCacheBlocks(false); // <2>

    TableMapReduceUtil.initTableMapperJob( // <3>
            options.inputTable, // Input HBase table name
            scan, // Scan instance to control what to index
            HBaseAvroToSOLRMapper.class, // Mapper to parse cells content.
            Text.class, // Mapper output key
            SolrInputDocumentWritable.class, // Mapper output value
            job);

    FileOutputFormat.setOutputPath(job, outputReduceDir);

    job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class));
    job.setReducerClass(SolrReducer.class); // <4>
    job.setPartitionerClass(SolrCloudPartitioner.class); // <5>
    job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost);
    job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection);
    job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards);

    job.setOutputFormatClass(SolrOutputFormat.class);
    SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SolrInputDocumentWritable.class);
    job.setSpeculativeExecution(false);
    // end::SETUP[]
    job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have.
    if (!waitForCompletion(job, true)) {
        return -1;// job failed
    }

    // -------------------------------------------------------------------------------------------------------------------------------------

    assert reducers == options.shards;

    // normalize output shard dir prefix, i.e.
    // rename part-r-00000 to part-00000 (stems from zero tree merge iterations)
    // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations)
    for (FileStatus stats : fs.listStatus(outputReduceDir)) {
        String dirPrefix = SolrOutputFormat.getOutputName(job);
        Path srcPath = stats.getPath();
        if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
            String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length());
            Path dstPath = new Path(srcPath.getParent(), dstName);
            if (!rename(srcPath, dstPath, fs)) {
                return -1;
            }
        }
    }
    ;

    // publish results dir
    if (!rename(outputReduceDir, outputResultsDir, fs)) {
        return -1;
    }

    if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) {
        return -1;
    }

    goodbye(job, programStartTime);
    return 0;
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java

License:Apache License

/**
 * Test for map only job./*from  ww w.j a  v a2s .c o  m*/
 * @throws Exception if failed
 */
@Test
public void map_only() throws Exception {
    Job job = newJob();
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(SimpleMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    File inputDir = folder.newFolder();
    File inputFile = new File(inputDir, "input.txt");
    write(inputFile, "Hello, world!");

    File outputDir = folder.newFolder();
    outputDir.delete();

    FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
    FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
    assertThat(new SimpleJobRunner().run(job), is(true));
    assertThat(trimHead(read(outputDir)), is(set("Hello, world!")));
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java

License:Apache License

/**
 * Test for map-reduce job.//from  w  w  w  .j a va 2s.co m
 * @throws Exception if failed
 */
@Test
public void map_reduce() throws Exception {
    Job job = newJob();
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(WordCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setSortComparatorClass(Text.Comparator.class);
    job.setGroupingComparatorClass(Text.Comparator.class);

    job.setReducerClass(WordCountReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    File inputDir = folder.newFolder();
    File inputFile = new File(inputDir, "input.txt");
    write(inputFile, new String[] { "a b c d", "a a b c", "c", });

    File outputDir = folder.newFolder();
    outputDir.delete();

    FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
    FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
    assertThat(new SimpleJobRunner().run(job), is(true));
    assertThat(toMap(read(outputDir)), is(map(new String[] { "a", "3", "b", "2", "c", "3", "d", "1", })));
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java

License:Apache License

/**
 * Test for wrong job./*from   ww  w .j  a v a  2  s  .  co  m*/
 * @throws Exception if failed
 */
@Test
public void exception() throws Exception {
    Job job = newJob();
    job.setJobName("w/ exception");
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(InvalidMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    File inputDir = folder.newFolder();
    File inputFile = new File(inputDir, "input.txt");
    write(inputFile, "testing");

    File outputDir = folder.newFolder();
    outputDir.delete();

    FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
    FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
    assertThat(new SimpleJobRunner().run(job), is(false));
}

From source file:com.asakusafw.runtime.stage.AbstractStageClient.java

License:Apache License

private void configureStageOutput(Job job, VariableTable variables) throws IOException {
    String outputPath = variables.parse(getStageOutputPath());
    List<StageOutput> outputList = new ArrayList<>();
    for (StageOutput output : getStageOutputs()) {
        String name = output.getName();
        Class<?> keyClass = output.getKeyClass();
        Class<?> valueClass = output.getValueClass();
        Class<? extends OutputFormat<?, ?>> formatClass = output.getFormatClass();
        Map<String, String> attributes = output.getAttributes();
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Output: path={0}/{1}-*, format={2}, key={3}, value={4}, attributes={5}", //$NON-NLS-1$
                    outputPath, name, formatClass.getName(), keyClass.getName(), valueClass.getName(),
                    attributes));/* ww w.j ava 2s .c  o  m*/
        }
        outputList.add(new StageOutput(name, keyClass, valueClass, formatClass, attributes));
    }
    StageOutputDriver.set(job, outputPath, outputList);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setOutputFormatClass(StageOutputFormat.class);
    job.getConfiguration().setClass("mapred.output.committer.class", //$NON-NLS-1$
            LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class);
}

From source file:com.asakusafw.runtime.stage.output.StageOutputDriver.java

License:Apache License

private ResultOutput<?> buildNormalSink(String name,
        @SuppressWarnings("rawtypes") Class<? extends OutputFormat> formatClass, Class<?> keyClass,
        Class<?> valueClass, List<Counter> counters) throws IOException, InterruptedException {
    assert context != null;
    assert name != null;
    assert formatClass != null;
    assert keyClass != null;
    assert valueClass != null;
    assert counters != null;
    Job job = JobCompatibility.newJob(context.getConfiguration());
    job.setOutputFormatClass(formatClass);
    job.setOutputKeyClass(keyClass);
    job.setOutputValueClass(valueClass);
    TaskAttemptContext localContext = JobCompatibility.newTaskAttemptContext(job.getConfiguration(),
            context.getTaskAttemptID());
    if (FileOutputFormat.class.isAssignableFrom(formatClass)) {
        setOutputFilePrefix(localContext, name);
    }/*from   w  ww .j a  v a  2s .c  o  m*/
    OutputFormat<?, ?> format = ReflectionUtils.newInstance(formatClass, localContext.getConfiguration());
    RecordWriter<?, ?> writer = format.getRecordWriter(localContext);
    return new ResultOutput<Writable>(localContext, writer);
}

From source file:com.asakusafw.testdriver.file.FileDeployer.java

License:Apache License

/**
 * Opens output for the specified {@link OutputFormat}.
 * @param <V> value type/*from w ww .jav  a2 s  .c o  m*/
 * @param definition target model definition
 * @param destination output location
 * @param output format
 * @return the opened {@link ModelOutput}
 * @throws IOException if failed to open the target output
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public <V> ModelOutput<V> openOutput(DataModelDefinition<V> definition, final String destination,
        FileOutputFormat<? super NullWritable, ? super V> output) throws IOException {
    assert destination != null;
    assert output != null;
    LOG.debug("Opening {} using {}", destination, output.getClass().getName());
    Job job = Job.getInstance(configuration);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(definition.getModelClass());
    final File temporaryDir = File.createTempFile("asakusa", ".tempdir");
    if (temporaryDir.delete() == false || temporaryDir.mkdirs() == false) {
        throw new IOException("Failed to create temporary directory");
    }
    LOG.debug("Using staging deploy target: {}", temporaryDir);
    URI uri = temporaryDir.toURI();
    FileOutputFormat.setOutputPath(job, new Path(uri));
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0));
    FileOutputFormatDriver<V> result = new FileOutputFormatDriver<V>(context, output, NullWritable.get()) {
        @Override
        public void close() throws IOException {
            super.close();
            deploy(destination, temporaryDir);
        }
    };
    return result;
}

From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java

License:Apache License

private void updateMerge() throws IOException, InterruptedException {
    Job job = newJob();

    List<StageInput> inputList = new ArrayList<>();
    inputList.add(new StageInput(storage.getHeadContents("*").toString(), TemporaryInputFormat.class,
            MergeJoinBaseMapper.class));
    inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class,
            MergeJoinPatchMapper.class));
    StageInputDriver.set(job, inputList);
    job.setInputFormatClass(StageInputFormat.class);
    job.setMapperClass(StageInputMapper.class);
    job.setMapOutputKeyClass(PatchApplyKey.class);
    job.setMapOutputValueClass(modelClass);

    // combiner may have no effect in normal cases
    job.setReducerClass(MergeJoinReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(modelClass);
    job.setPartitionerClass(PatchApplyKey.Partitioner.class);
    job.setSortComparatorClass(PatchApplyKey.SortComparator.class);
    job.setGroupingComparatorClass(PatchApplyKey.GroupComparator.class);

    TemporaryOutputFormat.setOutputPath(job, getNextDirectory());
    job.setOutputFormatClass(TemporaryOutputFormat.class);
    job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class,
            org.apache.hadoop.mapred.OutputCommitter.class);

    LOG.info(MessageFormat.format("applying patch (merge join): {0} / {1} -> {2}",
            storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
    try {//  www  .  java 2s. co m
        boolean succeed = job.waitForCompletion(true);
        LOG.info(MessageFormat.format("applied patch (merge join): succeed={0}, {1} / {2} -> {3}", succeed,
                storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        if (succeed == false) {
            throw new IOException(MessageFormat.format("failed to apply patch (merge join): {0} / {1} -> {2}",
                    storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        }
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
    putMeta();
}