Example usage for org.apache.hadoop.mapreduce Job getConfiguration

List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration() 

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf)
        throws IOException {

    Job job = new Job(new Configuration(conf));
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }/*from   ww w. j av  a2  s. c om*/
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

/**
 * Create a map and reduce Hadoop job.  Does not set the name on the job.
 * @param inputPath The input {@link org.apache.hadoop.fs.Path}
 * @param outputPath The output {@link org.apache.hadoop.fs.Path}
 * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat}
 * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use
 * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class.  If the Mapper is a no-op,
 *                  this value may be null
 * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class.  If the Mapper is a no-op,
 *                    this value may be null
 * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use
 * @param reducerKey The reducer key class.
 * @param reducerValue The reducer value class.
 * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}.
 * @param conf The {@link org.apache.hadoop.conf.Configuration} to use.
 * @return The {@link org.apache.hadoop.mapreduce.Job}.
 * @throws IOException if there is a problem with the IO.
 *
 * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
 * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class,
 * org.apache.hadoop.conf.Configuration)
 *//*from w ww. j a  va2  s .  co  m*/
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    //    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.conductor.hadoop.WritableValueInputFormat.java

License:Apache License

/**
 * Writes the provided {@code values} to an input file to be read by the {@link Job}, and sets up all additional
 * necessary configuration.//from ww w.j  a  v a 2  s  .c om
 * 
 * @param values
 *            the values to be read by the job.
 * @param clazz
 *            the type of the values.
 * @param inputsPerSplit
 *            how man inputs each split gets
 * @param job
 *            the job to configure
 * @param <V>
 *            the type of the {@code values}
 * @throws IOException
 */
public static <V extends Writable> void setupInput(final List<V> values, Class<V> clazz,
        final int inputsPerSplit, final Job job) throws IOException {
    final Path inputPath = new Path("job_input_" + System.currentTimeMillis() + UUID.randomUUID().toString());
    final Writer writer = SequenceFile.createWriter(FileContext.getFileContext(job.getConfiguration()),
            job.getConfiguration(), inputPath, NullWritable.class, clazz, CompressionType.NONE, CODEC,
            new Metadata(), EnumSet.of(CreateFlag.CREATE), DUMMY_VAR_ARGS);
    doSetupInput(values, clazz, inputsPerSplit, job, inputPath, writer);
}

From source file:com.conductor.hadoop.WritableValueInputFormat.java

License:Apache License

@VisibleForTesting
static <V extends Writable> void doSetupInput(final List<V> values, final Class<V> clazz,
        final int inputsPerSplit, final Job job, final Path inputPath, final Writer writer) throws IOException {
    job.getConfiguration().setClass(VALUE_TYPE_CONF, clazz, Writable.class);
    job.getConfiguration().setInt(INPUTS_PER_SPLIT_CONF, inputsPerSplit);
    job.getConfiguration().set(INPUT_FILE_LOCATION_CONF, inputPath.toString());

    // write each value to the sequence file
    int syncCounter = 0;
    for (final V input : values) {
        // each entry in the sequence file is a map input
        writer.append(NullWritable.get(), input);
        // syncing indicates an input split boundary
        if (++syncCounter % inputsPerSplit == 0) {
            writer.sync();//from  w w  w  . j a  va2s  .  c  om
        }
    }
    // close the input file
    writer.hflush();
    writer.close();

    // delete file when JVM exits
    inputPath.getFileSystem(job.getConfiguration()).deleteOnExit(inputPath);
}

From source file:com.conductor.kafka.hadoop.KafkaRecordReaderTest.java

License:Apache License

@Before
public void setUp() throws Exception {
    conf = new Configuration(false);
    when(context.getConfiguration()).thenReturn(conf);
    final Job job = mock(Job.class);
    when(job.getConfiguration()).thenReturn(conf);

    KafkaInputFormat.setConsumerGroup(job, "group");
    KafkaInputFormat.setKafkaSocketTimeoutMs(job, 1000);
    KafkaInputFormat.setKafkaBufferSizeBytes(job, 4096);
    KafkaInputFormat.setKafkaFetchSizeBytes(job, 2048);

    final Broker broker = new Broker("localhost", 9092, 1);
    this.partition = new Partition("topic", 0, broker);
    split = new KafkaInputSplit(partition, 0, 100, true);

    reader = spy(new KafkaRecordReader());
    reader.initialize(split, context);/*from  ww  w.ja va 2 s  . co  m*/
}

From source file:com.conversantmedia.mapreduce.io.avro.MultiSchemaAvroSerialization.java

License:Apache License

/**
 * Register the schemas this serializer will ser/deser to/from.
 * @param job      the job to be configured
 * @param schemas   list of schemas to register (Will assign internal indices
 *          based on the order they're provided.)
 *//*ww  w.  j a  v a  2  s . c  o  m*/
public static void registerSchemas(Job job, Schema... schemas) {
    String[] names = new String[schemas.length];
    int idx = 0;
    for (Schema schema : schemas) {
        names[idx++] = schema.getFullName();
    }
    job.getConfiguration().setStrings(CONF_KEY_MULTI_SCHEMAS, names);

    registerSerialization(job);
}

From source file:com.conversantmedia.mapreduce.io.avro.MultiSchemaAvroSerialization.java

License:Apache License

/**
 * Add this class to the list of serializers.
 * @param job   the job for registering serialization 
 *//*from   ww  w. j a v a  2 s.co m*/
public static void registerSerialization(Job job) {
    String[] strings = job.getConfiguration().getStrings("io.serializations");
    String[] newStrings = new String[strings.length + 1];
    System.arraycopy(strings, 0, newStrings, 0, strings.length);
    newStrings[newStrings.length - 1] = MultiSchemaAvroSerialization.class.getName();
    job.getConfiguration().setStrings("io.serializations", newStrings);

}

From source file:com.conversantmedia.mapreduce.io.CompositeSortKeySerialization.java

License:Apache License

/**
 * Convenience method to configure the job for using the composite key.
 * @param job            the job using this serializer
 * @param groupKeyClass      the key type used for grouping
 * @param sortKeyClass      the key type used for sorting
 *///  w w w.  ja v a 2s. c  om
@SuppressWarnings("rawtypes")
public static void configureMapOutputKey(Job job, Class<? extends WritableComparable> groupKeyClass,
        Class<? extends WritableComparable> sortKeyClass) {

    // First, setup our classes...
    job.getConfiguration().set(CONF_KEY_GROUPKEY_CLASS, groupKeyClass.getName());
    job.getConfiguration().set(CONF_KEY_SORTKEY_CLASS, sortKeyClass.getName());

    // Set this class as our map output key
    job.setMapOutputKeyClass(CompositeSortKey.class);

    // Setup the partitioner and comparators.
    job.setPartitionerClass(CompositeSortKey.KeyPartitioner.class);
    job.setGroupingComparatorClass(CompositeSortKey.GroupingComparator.class);
    job.setSortComparatorClass(CompositeSortKey.NaturalSortComparator.class);

    // Now setup the serialization by registering with the framework.
    Collection<String> serializations = new ArrayList<>();
    serializations.add(CompositeSortKeySerialization.class.getName());
    serializations.addAll(job.getConfiguration().getStringCollection("io.serializations"));
    job.getConfiguration().setStrings("io.serializations",
            serializations.toArray(new String[serializations.size()]));

}

From source file:com.conversantmedia.mapreduce.io.CompositeSortKeyTest.java

License:Apache License

@Before
public void setup() {
    TestMapper mapper = new TestMapper();
    TestReducer reducer = new TestReducer();
    driver = MapReduceDriver.newMapReduceDriver(mapper, reducer);

    Configuration conf = driver.getConfiguration();
    Job job = mock(Job.class);
    when(job.getConfiguration()).thenReturn(conf);

    CompositeSortKeySerialization.configureMapOutputKey(job, Text.class, IntWritable.class);

    // MRUnit sets these differently than standard MapReduce:
    driver.setKeyGroupingComparator(new CompositeSortKey.GroupingComparator<Text, IntWritable>());
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.AvroJobInfoAnnotationHandler.java

License:Apache License

@Override
public void process(Annotation annotation, Job job, Object target) throws ToolException {

    AvroJobInfo avroInfo = (AvroJobInfo) annotation;
    if (avroInfo.inputKeySchema() != AvroDefault.class) {
        AvroJob.setInputKeySchema(job, getSchema(avroInfo.inputKeySchema()));
    }//ww w . ja va  2s.c  o m
    if (avroInfo.inputValueSchema() != AvroDefault.class) {
        AvroJob.setInputValueSchema(job, getSchema(avroInfo.inputValueSchema()));
    }

    if (avroInfo.outputKeySchema() != AvroDefault.class) {
        AvroJob.setOutputKeySchema(job, getSchema(avroInfo.outputKeySchema()));
    }
    if (avroInfo.outputValueSchema() != AvroDefault.class) {
        AvroJob.setOutputValueSchema(job, getSchema(avroInfo.outputValueSchema()));
    }

    if (avroInfo.mapOutputKeySchema() != AvroDefault.class) {
        AvroJob.setMapOutputKeySchema(job, getSchema(avroInfo.mapOutputKeySchema()));
    }
    if (avroInfo.mapOutputValueSchema() != AvroDefault.class) {
        AvroJob.setMapOutputValueSchema(job, getSchema(avroInfo.mapOutputValueSchema()));
    }

    AvroSerialization.addToConfiguration(job.getConfiguration());
}