Example usage for org.apache.hadoop.mapreduce Job getMapOutputValueClass

List of usage examples for org.apache.hadoop.mapreduce Job getMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getMapOutputValueClass.

Prototype

public Class<?> getMapOutputValueClass() 

Source Link

Document

Get the value class for the map output data.

Usage

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormat2.java

License:Apache License

static void configureIncrementalLoad(Job job, HTable table, Class<? extends OutputFormat<?, ?>> cls)
        throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormat2.class);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {/*from ww w. j a v a 2 s.com*/
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName()));
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(table, conf);
    configureBloomType(table, conf);
    configureBlockSize(table, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured.");
}

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java

License:Apache License

public static void configureIncrementalLoad(Job job, HTable table, Class<? extends OutputFormat<?, ?>> cls)
        throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormatBase.class);

    // Based on the configured map output class, set the correct reducer to
    // properly/* w w  w .j  ava2s  .  c o m*/
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName()));
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(table, conf);
    configureBloomType(table, conf);
    configureBlockSize(table, conf);

    // TableMapReduceUtil.addDependencyJars(job);// 
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured.");
}

From source file:com.savy3.nonequijoin.MapOutputSampler.java

License:Apache License

/**
 * Driver for InputSampler MapReduce Job
 *//* w  w w  . j av a  2 s.  c o  m*/
public static void runMap(Job job, Path sampleInputPath)
        throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException {
    LOG.info("Running a MapReduce Job on Sample Input File" + sampleInputPath.toString());

    Configuration conf = new Configuration();
    conf.setBoolean("mapreduce.job.ubertask.enable", true);
    conf.set("numSamples", "" + (job.getNumReduceTasks() - 1));
    Job sampleJob = new Job(conf);
    sampleJob.setMapperClass(job.getMapperClass());
    sampleJob.setReducerClass(SampleKeyReducer.class);
    sampleJob.setJarByClass(job.getMapperClass());
    sampleJob.setMapOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setMapOutputValueClass(job.getMapOutputValueClass());
    sampleJob.setOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setOutputValueClass(NullWritable.class);
    sampleJob.setInputFormatClass(SequenceFileInputFormat.class);
    sampleJob.setOutputFormatClass(SequenceFileOutputFormat.class);

    SequenceFileInputFormat.addInputPath(sampleJob, sampleInputPath);
    FileSystem fs = FileSystem.get(conf);

    Path out = new Path(sampleInputPath.getParent(), "mapOut");
    fs.delete(out, true);

    SequenceFileOutputFormat.setOutputPath(sampleJob, out);

    sampleJob.waitForCompletion(true);

    LOG.info("Sample MapReduce Job Output File" + out.toString());

    Path partFile = new Path(out, "part-r-00000");
    Path tmpFile = new Path("/_tmp");
    fs.delete(tmpFile, true);
    fs.rename(partFile, tmpFile);
    fs.delete(sampleInputPath.getParent(), true);
    fs.rename(new Path("/_tmp"), sampleInputPath.getParent());

    LOG.info("Sample partitioning file cpied to location " + sampleInputPath.getParent().toString());
}

From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java

License:Apache License

/**
 * Add the dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *//* w ww .  j av  a 2 s .c  om*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class,
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:gr.ntua.h2rdf.inputFormat2.TableMapReduceUtil.java

License:Open Source License

/**
 * Add the HBase dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *//*from   w  w w  . j a  v  a 2  s  . com*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(),
                job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(),
                job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.apache.blur.mapreduce.lib.BlurMapReduceUtil.java

License:Apache License

/**
 * Add the Blur dependency jars as well as jars for any of the configured job
 * classes to the job configuration, so that JobClient will ship them to the
 * cluster and add them to the DistributedCache.
 *///from  www .  ja  v a2  s .com
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass(), DocumentVisibility.class);
        addAllJarsInBlurLib(job.getConfiguration());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.apache.kudu.mapreduce.KuduTableMapReduceUtil.java

License:Apache License

/**
 * Add the Kudu dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *///from   ww  w.  j a  v  a2  s .  co  m
public static void addDependencyJars(Job job) throws IOException {
    addKuduDependencyJars(job.getConfiguration());
    try {
        addDependencyJars(job.getConfiguration(),
                // when making changes here, consider also mapred.TableMapReduceUtil
                // pull job classes
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.apache.kylin.storage.hbase.steps.HFileOutputFormat3.java

License:Apache License

static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator,
        Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(cls);/* w  w w .  j a  v  a 2 s.co  m*/

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + tableDescriptor.getTableName());
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(conf, tableDescriptor);
    configureBloomType(tableDescriptor, conf);
    configureBlockSize(tableDescriptor, conf);
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
}

From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java

License:Apache License

/**
 * Add the HBase dependency jars as well as jars for any of the configured job
 * classes to the job configuration, so that JobClient will ship them to the
 * cluster and add them to the DistributedCache.
 *///from www  . jav  a 2s  . c o  m
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class,
                org.apache.hadoop.hbase.util.Bytes.class, // one class from
                // hbase.jar
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.kiji.mapreduce.framework.MapReduceJobBuilder.java

License:Apache License

/**
 * Configures the job with any Avro reader or writer schemas specified by the mapper class.
 *
 * <p>If the job's mapper class uses AvroKey as the job's input key class, it should
 * have implemented the AvroKeyReader interface to specify the reader schema for the
 * input key.  Likewise, if it uses AvroValue as the job's input value class, it should
 * have implemented the AvroValueReader interface.</p>
 *
 * <p>If the job's mapper class uses AvroKey as the output key class, it should
 * have implemented the AvroKeyWriter interface to specify the writer schema for the
 * output key.  Likewise, if it uses AvroValue as the output value class, it should have
 * implemented the AvroValueWriter interface.</p>
 *
 * <p>This method makes sure those interfaces were implemented correctly, uses them to
 * fetch the reader/writer schemas as necessary, and sets them in the Job configuration
 * so the Avro input format and serialization framework can access them.</p>
 *
 * @param job The job to configure./*  w ww. j a va  2 s. c  o m*/
 * @param mapper The Kiji mapper the job is configured to run.
 * @throws IOException If the Avro schemas cannot be configured.
 */
protected void configureAvro(Job job, KijiMapper<?, ?, ?, ?> mapper) throws IOException {
    // If the user has specified particular reader schemas for the records of the input,
    // put it in the job configuration.
    Schema inputKeyReaderSchema = AvroMapReduce.getAvroKeyReaderSchema(mapper);
    if (null != inputKeyReaderSchema) {
        LOG.info("Setting reader schema for the map input key to: " + inputKeyReaderSchema);
        AvroJob.setInputKeySchema(job, inputKeyReaderSchema);
    }
    Schema inputValueReaderSchema = AvroMapReduce.getAvroValueReaderSchema(mapper);
    if (null != inputValueReaderSchema) {
        LOG.info("Setting reader schema for the map input value to: " + inputValueReaderSchema);
        AvroJob.setInputValueSchema(job, inputValueReaderSchema);
    }

    // Set the output writer schemas in the job configuration (if specified).
    Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(mapper);
    if (null != outputKeyWriterSchema) {
        if (!AvroKey.class.isAssignableFrom(job.getMapOutputKeyClass())) {
            throw new JobConfigurationException(
                    mapper.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema"
                            + " but the output key class was not AvroKey.");
        }
        LOG.info("Setting avro serialization for map output key schema: " + outputKeyWriterSchema);
        AvroJob.setMapOutputKeySchema(job, outputKeyWriterSchema);
    }
    Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(mapper);
    if (null != outputValueWriterSchema) {
        if (!AvroValue.class.isAssignableFrom(job.getMapOutputValueClass())) {
            throw new JobConfigurationException(
                    mapper.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema"
                            + " but the output value class was not AvroValue.");
        }
        LOG.info("Setting avro serialization for map output value schema: " + outputValueWriterSchema);
        AvroJob.setMapOutputValueSchema(job, outputValueWriterSchema);
    }
}