Example usage for org.apache.hadoop.mapreduce Job getOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getOutputValueClass.

Prototype

public Class<?> getOutputValueClass()

Source Link

Document

Get the value class for job outputs.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Sets the configurations used for outputs.
 */// w w  w  .j  a  v  a  2 s. c o m
private void setOutputsIfNeeded(Job job) {
    Map<String, OutputFormatProvider> outputFormatProviders = context.getOutputFormatProviders();
    LOG.debug("Using as output for MapReduce Job: {}", outputFormatProviders.keySet());

    if (outputFormatProviders.isEmpty()) {
        // user is not going through our APIs to add output; leave the job's output format to user
        return;
    } else if (outputFormatProviders.size() == 1) {
        // If only one output is configured through the context, then set it as the root OutputFormat
        Map.Entry<String, OutputFormatProvider> next = outputFormatProviders.entrySet().iterator().next();
        OutputFormatProvider outputFormatProvider = next.getValue();
        ConfigurationUtil.setAll(outputFormatProvider.getOutputFormatConfiguration(), job.getConfiguration());
        job.getConfiguration().set(Job.OUTPUT_FORMAT_CLASS_ATTR,
                outputFormatProvider.getOutputFormatClassName());
        return;
    }
    // multiple output formats configured via the context. We should use a RecordWriter that doesn't support writing
    // as the root output format in this case to disallow writing directly on the context
    MultipleOutputsMainOutputWrapper.setRootOutputFormat(job, UnsupportedOutputFormat.class.getName(),
            new HashMap<String, String>());
    job.setOutputFormatClass(MultipleOutputsMainOutputWrapper.class);

    for (Map.Entry<String, OutputFormatProvider> entry : outputFormatProviders.entrySet()) {
        String outputName = entry.getKey();
        OutputFormatProvider outputFormatProvider = entry.getValue();

        String outputFormatClassName = outputFormatProvider.getOutputFormatClassName();
        if (outputFormatClassName == null) {
            throw new IllegalArgumentException(
                    "Output '" + outputName + "' provided null as the output format");
        }

        Map<String, String> outputConfig = outputFormatProvider.getOutputFormatConfiguration();
        MultipleOutputs.addNamedOutput(job, outputName, outputFormatClassName, job.getOutputKeyClass(),
                job.getOutputValueClass(), outputConfig);

    }
}

From source file:com.baynote.kafka.hadoop.KafkaJobBuilderTest.java

License:Apache License

@Test
public void testConfigureWholeJob() throws Exception {
    // base configuration
    builder.setZkConnect("localhost:2181");
    builder.addQueueInput("queue_name", "group_name", MockMapper.class);
    builder.setTextFileOutputFormat("/a/hdfs/path");

    // extended configuration
    builder.setJobName("job_name");
    builder.setMapOutputKeyClass(Text.class);
    builder.setMapOutputValueClass(BytesWritable.class);
    builder.setReducerClass(MockReducer.class);
    builder.setTaskMemorySettings("-Xmx2048m");
    builder.setNumReduceTasks(100);//from  ww w  .j  av a2 s .com
    builder.setParitioner(MockPartitioner.class);
    builder.setKafkaFetchSizeBytes(1024);

    Job job = builder.configureJob(conf);

    assertEquals("job_name", job.getJobName());
    assertEquals(Text.class, job.getMapOutputKeyClass());
    assertEquals(BytesWritable.class, job.getMapOutputValueClass());
    assertEquals(MockReducer.class, job.getReducerClass());
    assertEquals(MockMapper.class, job.getMapperClass());
    assertEquals("-Xmx2048m", job.getConfiguration().get("mapred.child.java.opts"));
    assertEquals(100, job.getNumReduceTasks());
    assertEquals(MockPartitioner.class, job.getPartitionerClass());
    assertEquals(1024, KafkaInputFormat.getKafkaFetchSizeBytes(job.getConfiguration()));
    assertEquals(TextOutputFormat.class, job.getOutputFormatClass());
    assertEquals(KafkaInputFormat.class, job.getInputFormatClass());
    assertEquals("file:/a/hdfs/path", TextOutputFormat.getOutputPath(job).toString());

    builder.setJobName(null);
    builder.setSequenceFileOutputFormat();
    builder.setUseLazyOutput();
    builder.addQueueInput("queue_name_2", "group_name_2", MockMapper.class);

    job = builder.configureJob(conf);
    assertEquals(LazyOutputFormat.class, job.getOutputFormatClass());
    assertEquals(MultipleKafkaInputFormat.class, job.getInputFormatClass());
    assertEquals(DelegatingMapper.class, job.getMapperClass());
    assertEquals(BytesWritable.class, job.getOutputKeyClass());
    assertEquals(BytesWritable.class, job.getOutputValueClass());
    assertNotNull(SequenceFileOutputFormat.getOutputPath(job));
    assertNotNull(job.getJobName());

    // use s3
    builder.useS3("my_aws_key", "s3cr3t", "my-bucket");
    builder.setTextFileOutputFormat("/a/hdfs/path");
    job = builder.configureJob(conf);

    assertEquals("my_aws_key", job.getConfiguration().get("fs.s3n.awsAccessKeyId"));
    assertEquals("s3cr3t", job.getConfiguration().get("fs.s3n.awsSecretAccessKey"));
    assertEquals("my_aws_key", job.getConfiguration().get("fs.s3.awsAccessKeyId"));
    assertEquals("s3cr3t", job.getConfiguration().get("fs.s3.awsSecretAccessKey"));
}

From source file:com.cloudera.castagna.logparser.Utils.java

License:Apache License

public static void log(Job job, Logger log) throws ClassNotFoundException {
    log.debug("{} -> {} ({}, {}) -> {}#{} ({}, {}) -> {}",
            new Object[] { job.getInputFormatClass().getSimpleName(), job.getMapperClass().getSimpleName(),
                    job.getMapOutputKeyClass().getSimpleName(), job.getMapOutputValueClass().getSimpleName(),
                    job.getReducerClass().getSimpleName(), job.getNumReduceTasks(),
                    job.getOutputKeyClass().getSimpleName(), job.getOutputValueClass().getSimpleName(),
                    job.getOutputFormatClass().getSimpleName() });
    Path[] inputs = FileInputFormat.getInputPaths(job);
    Path output = FileOutputFormat.getOutputPath(job);
    log.debug("input: {}", inputs[0]);
    log.debug("output: {}", output);
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.NamedOutputAnnotationHandler.java

License:Apache License

@Override
public void process(Annotation annotation, Job job, Object target) throws ToolException {
    NamedOutput namedOut = (NamedOutput) annotation;
    KeyValue kv = namedOut.type();

    // If this is a MultipleOutputs member we're annotating, see if we can't
    // get the key/value from the parameters if there are any.
    Pair<Type, Type> kvTypePair = getGenericTypeParams(target);

    Class<?> keyClass = kv.key();
    if (keyClass == void.class) {
        if (kvTypePair != null) {
            keyClass = (Class<?>) kvTypePair.getKey();
        } else {/*from   w  ww .j  a va 2  s .  co m*/
            // fall back on job output key class
            keyClass = job.getOutputKeyClass();
        }
    }

    Class<?> valueClass = kv.value();
    if (valueClass == void.class) {
        if (kvTypePair != null) {
            valueClass = (Class<?>) kvTypePair.getValue();
        } else {
            valueClass = job.getOutputValueClass();
        }
    }

    String[] names = getNames(namedOut);
    for (String name : names) {
        name = (String) evaluateExpression(name);
        if (!configured.contains(name)) {
            MultipleOutputs.addNamedOutput(job, name, namedOut.format(), keyClass, valueClass);
            MultipleOutputs.setCountersEnabled(job, namedOut.countersEnabled());
            configured.add(name);
        }
    }
}

From source file:com.example.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Your job name");

    job.setJarByClass(Driver.class);

    logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: "
            + Arrays.toString(args));

    if (args.length < 2) {
        logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar()
                + " input_files output_directory");
        return 1;
    }/*from w w w  .ja  v a  2 s  . c o m*/

    job.setMapperClass(WordcountMapper.class);
    logger.info("mapper class is " + job.getMapperClass());

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    logger.info("mapper output key class is " + job.getMapOutputKeyClass());
    logger.info("mapper output value class is " + job.getMapOutputValueClass());

    job.setReducerClass(WordcountReducer.class);
    logger.info("reducer class is " + job.getReducerClass());
    job.setCombinerClass(WordcountReducer.class);
    logger.info("combiner class is " + job.getCombinerClass());
    //When you are not runnign any Reducer
    //OR    job.setNumReduceTasks(0);
    //      logger.info("number of reduce task is " + job.getNumReduceTasks());

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    logger.info("output key class is " + job.getOutputKeyClass());
    logger.info("output value class is " + job.getOutputValueClass());

    job.setInputFormatClass(TextInputFormat.class);
    logger.info("input format class is " + job.getInputFormatClass());

    job.setOutputFormatClass(TextOutputFormat.class);
    logger.info("output format class is " + job.getOutputFormatClass());

    Path filePath = new Path(args[0]);
    logger.info("input path " + filePath);
    FileInputFormat.setInputPaths(job, filePath);

    Path outputPath = new Path(args[1]);
    logger.info("output path " + outputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);
    return 0;
}

From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java

License:Apache License

@Test
public void testGatherToHFile() throws Exception {
    final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf())
            .withInputTable(mTable.getURI()).withGatherer(GatherToHFile.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10))
            .build();// w ww .j av  a  2 s . c  o  m

    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(GatherToHFile.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS));
    assertEquals(null, job.getCombinerClass());
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(HFileKeyValue.class, job.getOutputKeyClass());
    assertEquals(NullWritable.class, job.getOutputValueClass());
}

From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java

License:Apache License

@Test
public void testGatherReducerToHFile() throws Exception {
    final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf())
            .withInputTable(mTable.getURI()).withGatherer(SimpleGatherer.class)
            .withReducer(ReducerToHFile.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10))
            .build();/*  ww w. j a  v  a2s .  c  o m*/

    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(SimpleGatherer.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS));
    assertEquals(null, job.getCombinerClass());
    assertEquals(ReducerToHFile.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(SequenceFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(HFileKeyValue.class, job.getOutputKeyClass());
    assertEquals(NullWritable.class, job.getOutputValueClass());
}

From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java

License:Apache License

/**
 * Add the dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *//* w w  w  .ja v  a  2 s.  c  o m*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class,
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:com.yahoo.ycsb.bulk.hbase.BulkDataGeneratorJob.java

License:Apache License

/**
 * Parameters for bulk loader specified through the config file:
 *
 * - prefix for the row keys/* www .j a va 2s  . c o  m*/
 * - range start
 * - range end (inclusive)
 * - num splits (or number of partitions).
 * - user
 * - password
 * - table
 *
 * For the accepted default options
 * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
 */
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    Util.printArgs("run", args, System.err);
    printKeyValues(conf, ARG_KEYS, System.err);

    if (args.length > 1 || (args.length == 1 && "-help".compareToIgnoreCase(args[0]) == 0)) {
        System.err.println("Usage: " + this.getClass().getName()
                + "input_path [generic options] [input_paths...] ouptut_path");
        GenericOptionsParser.printGenericCommandUsage(System.err);
        return 1;
    }

    // Time run
    long startTime = System.currentTimeMillis();
    String workdir;

    if (args.length == 1) {
        /* override workdir in the config if it is specified in the
         * command line
         */
        conf.set(ARG_KEY_OUTDIR, args[0]);
        workdir = args[0];
    }

    workdir = conf.get(ARG_KEY_OUTDIR);

    if (workdir == null) {
        System.err.println("No output directory specified");
        return 1;
    }

    /* Initialize job, check parameters and decide which mapper to use */
    Job job = new Job(conf, conf.get(ARG_KEY_JOBNAME, "YCSB KV data generator"));

    /* these settings are the same (i.e., fixed) independent of the
     * parameters */
    job.setJarByClass(this.getClass());
    // job.setInputFormatClass(TextInputFormat.class);
    job.setInputFormatClass(NLineInputFormat.class);

    /* these settings should depend on the type of output file */
    job.setOutputFormatClass(HFileOutputFormat.class);
    /* not sure the next two are needed */
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);

    this.createInputFile(job, workdir);

    HFileOutputFormat.setOutputPath(job, new Path(workdir + "/files"));

    /* depending on whether the keys need to be sorted and hashed, then
     * decide which mapper and reducer to use 
     */
    boolean hashKeys = conf.getBoolean(ARG_KEY_HASH_KEYS, false);
    boolean sortKeys = conf.getBoolean(ARG_KEY_SORTKEYS, true);

    /* get splits file name: side-effect -> this may generate a splits file  */
    String splitsfile = this.getSplitsFile(job, workdir);

    if (sortKeys && hashKeys) { /* do a full map reduce job */
        job.setMapperClass(RowGeneratorMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setPartitionerClass(RangePartitioner.class);

        if (splitsfile == null) {
            /* Auto generate the splits file either from:
             * - the input key ranges
             * - from the current table splits
             */
            throw new InvalidInputException("No splits specified");
        }

        /* Set splits file */
        RangePartitioner.setSplitFile(job, splitsfile);

        /* Add reducer (based on mapper code) */
        job.setReducerClass(RowGeneratorReduce.class);

        /* the number of reducers is dependent on the number of
         * partitions
         */
        int numReduce = conf.getInt(ARG_KEY_NUMREDUCE, 1);
        job.setNumReduceTasks(numReduce);
    } else { /* perform a map only job */
        job.setMapperClass(RowGeneratorMapOnly.class);
        /* map output key and value types are the same as
         * for the job
         */
        job.setMapOutputKeyClass(job.getOutputKeyClass());
        job.setMapOutputValueClass(job.getOutputValueClass());
        job.setNumReduceTasks(0);
    }

    job.waitForCompletion(true);

    //        JobClient.runJob(conf);
    SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss.SSS z");
    SimpleDateFormat ddf = new SimpleDateFormat("HH:mm:ss.SSS");
    ddf.setTimeZone(TimeZone.getTimeZone("UTC"));
    long endTime = System.currentTimeMillis();
    System.out.println("Start time (ms): " + df.format(new Date(startTime)) + " -- " + startTime);
    System.out.println("End time (ms): " + df.format(new Date(endTime)) + " -- " + endTime);
    System.out
            .println("Elapsed time (ms): " + ddf.format(endTime - startTime) + " -- " + (endTime - startTime));
    return 0;
}

From source file:gr.ntua.h2rdf.inputFormat2.TableMapReduceUtil.java

License:Open Source License

/**
 * Add the HBase dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *//*w w w.  ja v  a  2  s  .  com*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(),
                job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(),
                job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}