Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.junz.hadoop.custom.SytsLogInputFormat.java

License:Apache License

public static void setNumberOfMaps(Job job, long number) {
    job.getConfiguration().setLong(NUMBER_MAP_PROPERTY, number);
}

From source file:com.jyz.study.hadoop.hbase.mapreduce.HFileOutputFormatBase.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This//w  w  w  .  ja  v a 2 s.c o m
 * <ul>
 * <li>Inspects the table to configure a total order partitioner</li>
 * <li>Uploads the partitions file to the cluster and adds it to the
 * DistributedCache</li>
 * <li>Sets the number of reduce tasks to match the current number of
 * regions</li>
 * <li>Sets the output key/value class to match HFileOutputFormat's
 * requirements</li>
 * <li>Sets the reducer up to perform the appropriate sorting (either
 * KeyValueSortReducer or PutSortReducer)</li>
 * </ul>
 * The user should be sure to set the map output value class to either
 * KeyValue or Put before running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table,
        Class<? extends HFileOutputFormatBase> hfileOutputFormatBase) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(hfileOutputFormatBase);

    // Based on the configured map output class, set the correct reducer to
    // properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName()));
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(table, conf);
    configureBloomType(table, conf);
    configureBlockSize(table, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured.");
}

From source file:com.knewton.mapreduce.io.SSTableInputFormat.java

License:Apache License

/**
 * Comparator class name for columns./*  w  w  w .java2s  .  c  o  m*/
 *
 * @param value
 *            The value of the property
 * @param job
 *            The current job
 */
public static void setComparatorClass(String value, Job job) {
    job.getConfiguration().set(PropertyConstants.COLUMN_COMPARATOR.txt, value);
}

From source file:com.knewton.mapreduce.io.SSTableInputFormat.java

License:Apache License

/**
 * This is not required if the column family type is standard.
 *
 * @param value/*from w  ww .j a  v  a2 s . co m*/
 *            The value of the property
 * @param job
 *            The current job
 */
public static void setSubComparatorClass(String value, Job job) {
    job.getConfiguration().set(PropertyConstants.COLUMN_SUBCOMPARATOR.txt, value);
}

From source file:com.knewton.mapreduce.io.SSTableInputFormat.java

License:Apache License

/**
 * Partitioner for decorating keys./*w ww  .  jav  a2s . co  m*/
 *
 * @param value
 *            The value of the property
 * @param job
 *            The current job
 */
public static void setPartitionerClass(String value, Job job) {
    job.getConfiguration().set(PropertyConstants.PARTITIONER.txt, value);
}

From source file:com.knewton.mapreduce.io.SSTableInputFormat.java

License:Apache License

/**
 * Sets the sparse column property// w ww. j a v a 2s . c o  m
 *
 * @param value
 *            The value of the property
 * @param job
 *            The current job
 */
public static void setIsSparse(boolean value, Job job) {
    job.getConfiguration().setBoolean(PropertyConstants.SPARSE_COLUMN.txt, value);
}

From source file:com.knewton.mapreduce.io.SSTableInputFormat.java

License:Apache License

/**
 * Column family type needs to be set if the column family type is Super.
 *
 * @param value//ww  w  .j a  va 2s.  c o m
 *            The value of the property
 * @param job
 *            The current job
 */
public static void setColumnFamilyType(String value, Job job) {
    job.getConfiguration().set(PropertyConstants.COLUMN_FAMILY_TYPE.txt, value);
}

From source file:com.knewton.mapreduce.io.SSTableInputFormat.java

License:Apache License

/**
 * Set the name of the column family to read. This is optional. If not set all the data tables
 * under the given input directory will be collected and processed.
 *
 * @param value/*from w  w w . j  av  a 2  s. c o  m*/
 *            The value of the property
 * @param job
 *            The current job
 */
public static void setColumnFamilyName(String value, Job job) {
    job.getConfiguration().set(PropertyConstants.COLUMN_FAMILY_NAME.txt, value);
}

From source file:com.knewton.mapreduce.io.SSTableInputFormat.java

License:Apache License

/**
 * Set the name of the keyspace to read. This is optional. If not set all the data tables
 * under the given input directory will be collected and processed.
 *
 * @param value//from  w ww . j a  v a2 s  . c  o  m
 *            The value of the property
 * @param job
 *            The current job
 */
public static void setKeyspaceName(String value, Job job) {
    job.getConfiguration().set(PropertyConstants.KEYSPACE_NAME.txt, value);
}

From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java

License:Apache License

/**
 * Tests to see if when given an input directory the {@link SSTableInputFormat} correctly
 * expands all sub directories and picks up all the data tables corresponding to a specific
 * column family./* w w  w .  ja  v a  2  s. c o m*/
 */
@Test
public void testListStatusWithColumnFamilyName() throws IOException {
    String cfName = "col_fam";
    Job job = Job.getInstance(new Configuration(false));
    Configuration conf = job.getConfiguration();
    SSTableInputFormat.setColumnFamilyName(cfName, job);
    List<FileStatus> result = testListStatus(conf, "./src/test/resources/input/");
    assertEquals(NUM_TABLES * NUM_TOKENS, result.size());
}