Example usage for org.apache.hadoop.mapreduce Job getOutputValueClass

List of usage examples for org.apache.hadoop.mapreduce Job getOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getOutputValueClass.

Prototype

public Class<?> getOutputValueClass() 

Source Link

Document

Get the value class for job outputs.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Sets the configurations used for outputs.
 */// w w  w  .j  a  v  a  2 s. c o m
private void setOutputsIfNeeded(Job job) {
    Map<String, OutputFormatProvider> outputFormatProviders = context.getOutputFormatProviders();
    LOG.debug("Using as output for MapReduce Job: {}", outputFormatProviders.keySet());

    if (outputFormatProviders.isEmpty()) {
        // user is not going through our APIs to add output; leave the job's output format to user
        return;
    } else if (outputFormatProviders.size() == 1) {
        // If only one output is configured through the context, then set it as the root OutputFormat
        Map.Entry<String, OutputFormatProvider> next = outputFormatProviders.entrySet().iterator().next();
        OutputFormatProvider outputFormatProvider = next.getValue();
        ConfigurationUtil.setAll(outputFormatProvider.getOutputFormatConfiguration(), job.getConfiguration());
        job.getConfiguration().set(Job.OUTPUT_FORMAT_CLASS_ATTR,
                outputFormatProvider.getOutputFormatClassName());
        return;
    }
    // multiple output formats configured via the context. We should use a RecordWriter that doesn't support writing
    // as the root output format in this case to disallow writing directly on the context
    MultipleOutputsMainOutputWrapper.setRootOutputFormat(job, UnsupportedOutputFormat.class.getName(),
            new HashMap<String, String>());
    job.setOutputFormatClass(MultipleOutputsMainOutputWrapper.class);

    for (Map.Entry<String, OutputFormatProvider> entry : outputFormatProviders.entrySet()) {
        String outputName = entry.getKey();
        OutputFormatProvider outputFormatProvider = entry.getValue();

        String outputFormatClassName = outputFormatProvider.getOutputFormatClassName();
        if (outputFormatClassName == null) {
            throw new IllegalArgumentException(
                    "Output '" + outputName + "' provided null as the output format");
        }

        Map<String, String> outputConfig = outputFormatProvider.getOutputFormatConfiguration();
        MultipleOutputs.addNamedOutput(job, outputName, outputFormatClassName, job.getOutputKeyClass(),
                job.getOutputValueClass(), outputConfig);

    }
}

From source file:com.baynote.kafka.hadoop.KafkaJobBuilderTest.java

License:Apache License

@Test
public void testConfigureWholeJob() throws Exception {
    // base configuration
    builder.setZkConnect("localhost:2181");
    builder.addQueueInput("queue_name", "group_name", MockMapper.class);
    builder.setTextFileOutputFormat("/a/hdfs/path");

    // extended configuration
    builder.setJobName("job_name");
    builder.setMapOutputKeyClass(Text.class);
    builder.setMapOutputValueClass(BytesWritable.class);
    builder.setReducerClass(MockReducer.class);
    builder.setTaskMemorySettings("-Xmx2048m");
    builder.setNumReduceTasks(100);//from  ww w  .j  av a2 s .com
    builder.setParitioner(MockPartitioner.class);
    builder.setKafkaFetchSizeBytes(1024);

    Job job = builder.configureJob(conf);

    assertEquals("job_name", job.getJobName());
    assertEquals(Text.class, job.getMapOutputKeyClass());
    assertEquals(BytesWritable.class, job.getMapOutputValueClass());
    assertEquals(MockReducer.class, job.getReducerClass());
    assertEquals(MockMapper.class, job.getMapperClass());
    assertEquals("-Xmx2048m", job.getConfiguration().get("mapred.child.java.opts"));
    assertEquals(100, job.getNumReduceTasks());
    assertEquals(MockPartitioner.class, job.getPartitionerClass());
    assertEquals(1024, KafkaInputFormat.getKafkaFetchSizeBytes(job.getConfiguration()));
    assertEquals(TextOutputFormat.class, job.getOutputFormatClass());
    assertEquals(KafkaInputFormat.class, job.getInputFormatClass());
    assertEquals("file:/a/hdfs/path", TextOutputFormat.getOutputPath(job).toString());

    builder.setJobName(null);
    builder.setSequenceFileOutputFormat();
    builder.setUseLazyOutput();
    builder.addQueueInput("queue_name_2", "group_name_2", MockMapper.class);

    job = builder.configureJob(conf);
    assertEquals(LazyOutputFormat.class, job.getOutputFormatClass());
    assertEquals(MultipleKafkaInputFormat.class, job.getInputFormatClass());
    assertEquals(DelegatingMapper.class, job.getMapperClass());
    assertEquals(BytesWritable.class, job.getOutputKeyClass());
    assertEquals(BytesWritable.class, job.getOutputValueClass());
    assertNotNull(SequenceFileOutputFormat.getOutputPath(job));
    assertNotNull(job.getJobName());

    // use s3
    builder.useS3("my_aws_key", "s3cr3t", "my-bucket");
    builder.setTextFileOutputFormat("/a/hdfs/path");
    job = builder.configureJob(conf);

    assertEquals("my_aws_key", job.getConfiguration().get("fs.s3n.awsAccessKeyId"));
    assertEquals("s3cr3t", job.getConfiguration().get("fs.s3n.awsSecretAccessKey"));
    assertEquals("my_aws_key", job.getConfiguration().get("fs.s3.awsAccessKeyId"));
    assertEquals("s3cr3t", job.getConfiguration().get("fs.s3.awsSecretAccessKey"));
}

From source file:com.cloudera.castagna.logparser.Utils.java

License:Apache License

public static void log(Job job, Logger log) throws ClassNotFoundException {
    log.debug("{} -> {} ({}, {}) -> {}#{} ({}, {}) -> {}",
            new Object[] { job.getInputFormatClass().getSimpleName(), job.getMapperClass().getSimpleName(),
                    job.getMapOutputKeyClass().getSimpleName(), job.getMapOutputValueClass().getSimpleName(),
                    job.getReducerClass().getSimpleName(), job.getNumReduceTasks(),
                    job.getOutputKeyClass().getSimpleName(), job.getOutputValueClass().getSimpleName(),
                    job.getOutputFormatClass().getSimpleName() });
    Path[] inputs = FileInputFormat.getInputPaths(job);
    Path output = FileOutputFormat.getOutputPath(job);
    log.debug("input: {}", inputs[0]);
    log.debug("output: {}", output);
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.NamedOutputAnnotationHandler.java

License:Apache License

@Override
public void process(Annotation annotation, Job job, Object target) throws ToolException {
    NamedOutput namedOut = (NamedOutput) annotation;
    KeyValue kv = namedOut.type();

    // If this is a MultipleOutputs member we're annotating, see if we can't
    // get the key/value from the parameters if there are any.
    Pair<Type, Type> kvTypePair = getGenericTypeParams(target);

    Class<?> keyClass = kv.key();
    if (keyClass == void.class) {
        if (kvTypePair != null) {
            keyClass = (Class<?>) kvTypePair.getKey();
        } else {/*from   w  ww .j  a va 2  s .  co m*/
            // fall back on job output key class
            keyClass = job.getOutputKeyClass();
        }
    }

    Class<?> valueClass = kv.value();
    if (valueClass == void.class) {
        if (kvTypePair != null) {
            valueClass = (Class<?>) kvTypePair.getValue();
        } else {
            valueClass = job.getOutputValueClass();
        }
    }

    String[] names = getNames(namedOut);
    for (String name : names) {
        name = (String) evaluateExpression(name);
        if (!configured.contains(name)) {
            MultipleOutputs.addNamedOutput(job, name, namedOut.format(), keyClass, valueClass);
            MultipleOutputs.setCountersEnabled(job, namedOut.countersEnabled());
            configured.add(name);
        }
    }
}

From source file:com.example.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Your job name");

    job.setJarByClass(Driver.class);

    logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: "
            + Arrays.toString(args));

    if (args.length < 2) {
        logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar()
                + " input_files output_directory");
        return 1;
    }/*from w w w  .ja  v a  2 s  . c o m*/

    job.setMapperClass(WordcountMapper.class);
    logger.info("mapper class is " + job.getMapperClass());

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    logger.info("mapper output key class is " + job.getMapOutputKeyClass());
    logger.info("mapper output value class is " + job.getMapOutputValueClass());

    job.setReducerClass(WordcountReducer.class);
    logger.info("reducer class is " + job.getReducerClass());
    job.setCombinerClass(WordcountReducer.class);
    logger.info("combiner class is " + job.getCombinerClass());
    //When you are not runnign any Reducer
    //OR    job.setNumReduceTasks(0);
    //      logger.info("number of reduce task is " + job.getNumReduceTasks());

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    logger.info("output key class is " + job.getOutputKeyClass());
    logger.info("output value class is " + job.getOutputValueClass());

    job.setInputFormatClass(TextInputFormat.class);
    logger.info("input format class is " + job.getInputFormatClass());

    job.setOutputFormatClass(TextOutputFormat.class);
    logger.info("output format class is " + job.getOutputFormatClass());

    Path filePath = new Path(args[0]);
    logger.info("input path " + filePath);
    FileInputFormat.setInputPaths(job, filePath);

    Path outputPath = new Path(args[1]);
    logger.info("output path " + outputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);
    return 0;
}

From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java

License:Apache License

@Test
public void testGatherToHFile() throws Exception {
    final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf())
            .withInputTable(mTable.getURI()).withGatherer(GatherToHFile.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10))
            .build();// w ww .j av  a  2 s . c  o  m

    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(GatherToHFile.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS));
    assertEquals(null, job.getCombinerClass());
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(HFileKeyValue.class, job.getOutputKeyClass());
    assertEquals(NullWritable.class, job.getOutputValueClass());
}

From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java

License:Apache License

@Test
public void testGatherReducerToHFile() throws Exception {
    final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf())
            .withInputTable(mTable.getURI()).withGatherer(SimpleGatherer.class)
            .withReducer(ReducerToHFile.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10))
            .build();/*  ww w. j a  v  a2s .  c  o m*/

    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(SimpleGatherer.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS));
    assertEquals(null, job.getCombinerClass());
    assertEquals(ReducerToHFile.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(SequenceFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(HFileKeyValue.class, job.getOutputKeyClass());
    assertEquals(NullWritable.class, job.getOutputValueClass());
}

From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java

License:Apache License

/**
 * Add the dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *//* w w  w  .ja v  a  2 s.  c  o m*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class,
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:com.yahoo.ycsb.bulk.hbase.BulkDataGeneratorJob.java

License:Apache License

/**
 * Parameters for bulk loader specified through the config file:
 *
 * - prefix for the row keys/* www .j a va 2s  . c o  m*/
 * - range start
 * - range end (inclusive)
 * - num splits (or number of partitions).
 * - user
 * - password
 * - table
 *
 * For the accepted default options
 * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
 */
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    Util.printArgs("run", args, System.err);
    printKeyValues(conf, ARG_KEYS, System.err);

    if (args.length > 1 || (args.length == 1 && "-help".compareToIgnoreCase(args[0]) == 0)) {
        System.err.println("Usage: " + this.getClass().getName()
                + "input_path [generic options] [input_paths...] ouptut_path");
        GenericOptionsParser.printGenericCommandUsage(System.err);
        return 1;
    }

    // Time run
    long startTime = System.currentTimeMillis();
    String workdir;

    if (args.length == 1) {
        /* override workdir in the config if it is specified in the
         * command line
         */
        conf.set(ARG_KEY_OUTDIR, args[0]);
        workdir = args[0];
    }

    workdir = conf.get(ARG_KEY_OUTDIR);

    if (workdir == null) {
        System.err.println("No output directory specified");
        return 1;
    }

    /* Initialize job, check parameters and decide which mapper to use */
    Job job = new Job(conf, conf.get(ARG_KEY_JOBNAME, "YCSB KV data generator"));

    /* these settings are the same (i.e., fixed) independent of the
     * parameters */
    job.setJarByClass(this.getClass());
    // job.setInputFormatClass(TextInputFormat.class);
    job.setInputFormatClass(NLineInputFormat.class);

    /* these settings should depend on the type of output file */
    job.setOutputFormatClass(HFileOutputFormat.class);
    /* not sure the next two are needed */
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);

    this.createInputFile(job, workdir);

    HFileOutputFormat.setOutputPath(job, new Path(workdir + "/files"));

    /* depending on whether the keys need to be sorted and hashed, then
     * decide which mapper and reducer to use 
     */
    boolean hashKeys = conf.getBoolean(ARG_KEY_HASH_KEYS, false);
    boolean sortKeys = conf.getBoolean(ARG_KEY_SORTKEYS, true);

    /* get splits file name: side-effect -> this may generate a splits file  */
    String splitsfile = this.getSplitsFile(job, workdir);

    if (sortKeys && hashKeys) { /* do a full map reduce job */
        job.setMapperClass(RowGeneratorMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setPartitionerClass(RangePartitioner.class);

        if (splitsfile == null) {
            /* Auto generate the splits file either from:
             * - the input key ranges
             * - from the current table splits
             */
            throw new InvalidInputException("No splits specified");
        }

        /* Set splits file */
        RangePartitioner.setSplitFile(job, splitsfile);

        /* Add reducer (based on mapper code) */
        job.setReducerClass(RowGeneratorReduce.class);

        /* the number of reducers is dependent on the number of
         * partitions
         */
        int numReduce = conf.getInt(ARG_KEY_NUMREDUCE, 1);
        job.setNumReduceTasks(numReduce);
    } else { /* perform a map only job */
        job.setMapperClass(RowGeneratorMapOnly.class);
        /* map output key and value types are the same as
         * for the job
         */
        job.setMapOutputKeyClass(job.getOutputKeyClass());
        job.setMapOutputValueClass(job.getOutputValueClass());
        job.setNumReduceTasks(0);
    }

    job.waitForCompletion(true);

    //        JobClient.runJob(conf);
    SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss.SSS z");
    SimpleDateFormat ddf = new SimpleDateFormat("HH:mm:ss.SSS");
    ddf.setTimeZone(TimeZone.getTimeZone("UTC"));
    long endTime = System.currentTimeMillis();
    System.out.println("Start time (ms): " + df.format(new Date(startTime)) + " -- " + startTime);
    System.out.println("End time (ms): " + df.format(new Date(endTime)) + " -- " + endTime);
    System.out
            .println("Elapsed time (ms): " + ddf.format(endTime - startTime) + " -- " + (endTime - startTime));
    return 0;
}

From source file:gr.ntua.h2rdf.inputFormat2.TableMapReduceUtil.java

License:Open Source License

/**
 * Add the HBase dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *//*w w w.  ja v  a  2  s  .  com*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(),
                job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(),
                job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}