Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.bah.applefox.main.plugins.webcrawler.WebCrawler.java

License:Apache License

/**
 * run takes the comandline args as arguments (in this case from a
 * configuration file), creates a new job, configures it, initiates it,
 * waits for completion, and returns 0 if it is successful (1 if it is not)
 * //from   ww  w  . j  a v  a  2s.  com
 * @param args
 *            the commandline arguments (in this case from a configuration
 *            file)
 * 
 * @return 0 if the job ran successfully and 1 if it isn't
 */
public int run(String[] args) throws Exception {

    userAgent = args[6];

    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    String clone = args[5];
    String clone2 = args[12];
    table = clone;

    AccumuloUtils.setSplitSize(args[24]);
    table2 = clone2 + "From";
    table3 = clone2 + "To";

    job.setInputFormatClass(AccumuloInputFormat.class);
    InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), clone,
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, clone);

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java

License:Apache License

/**
 * Sets the Zookeeper connection string (required).
 * //from  w w  w.j  a v  a2s .c  om
 * @param job
 *            the job being configured
 * @param zkConnect
 *            zookeeper connection string.
 */
public static void setZkConnect(final Job job, final String zkConnect) {
    job.getConfiguration().set("kafka.zk.connect", zkConnect);
}

From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java

License:Apache License

/**
 * Set the Zookeeper session timeout for Kafka.
 * //  w  w  w .ja  v  a 2 s .  c o m
 * @param job
 *            the job being configured.
 * @param sessionTimeout
 *            the session timeout in milliseconds.
 */
public static void setZkSessionTimeoutMs(final Job job, final int sessionTimeout) {
    job.getConfiguration().setInt("kafka.zk.session.timeout.ms", sessionTimeout);
}

From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java

License:Apache License

/**
 * Set the Zookeeper connection timeout for Zookeeper.
 * //w  ww . j  a  v a2 s . c  o  m
 * @param job
 *            the job being configured.
 * @param connectionTimeout
 *            the connection timeout in milliseconds.
 */
public static void setZkConnectionTimeoutMs(final Job job, final int connectionTimeout) {
    job.getConfiguration().setInt("kafka.zk.connection.timeout.ms", connectionTimeout);
}

From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java

License:Apache License

/**
 * Sets the Zookeeper root for Kafka.//from  w  w w  .  j  a  v a 2  s.c o m
 * 
 * @param job
 *            the job being configured.
 * @param root
 *            the zookeeper root path.
 */
public static void setZkRoot(final Job job, final String root) {
    job.getConfiguration().set("kafka.zk.root", root);
}

From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java

License:Apache License

/**
 * Sets the input topic (required)./*from  ww  w.  j ava2s  .  co  m*/
 * 
 * @param job
 *            the job being configured
 * @param topic
 *            the topic name
 */
public static void setTopic(final Job job, final String topic) {
    job.getConfiguration().set("kafka.topic", topic);
}

From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java

License:Apache License

/**
 * Sets the consumer group of the input reader (required).
 * //w  w w  .j  a v  a  2 s  .  co m
 * @param job
 *            the job being configured.
 * @param consumerGroup
 *            consumer group name.
 */
public static void setConsumerGroup(final Job job, final String consumerGroup) {
    job.getConfiguration().set("kafka.groupid", consumerGroup);
}

From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java

License:Apache License

/**
 * Only consider partitions created <em>approximately</em> on or after {@code timestamp}.
 * <p>//from  ww  w  .  j  av  a 2  s . c o m
 * Note that you are only guaranteed to get all data on or after {@code timestamp}, but you may get <i>some</i> data
 * before the specified timestamp.
 * 
 * @param job
 *            the job being configured.
 * @param timestamp
 *            the timestamp.
 * @see SimpleConsumer#getOffsetsBefore
 */
public static void setIncludeOffsetsAfterTimestamp(final Job job, final long timestamp) {
    job.getConfiguration().setLong("kafka.timestamp.offset", timestamp);
}

From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java

License:Apache License

/**
 * Limits the number of splits to create per partition.
 * <p>/*from  w  w  w  . j ava2s. co  m*/
 * Note that it if there more partitions to consume than {@code maxSplits}, the input format will take the
 * <em>earliest</em> Kafka partitions.
 * 
 * @param job
 *            the job to configure.
 * @param maxSplits
 *            the maximum number of splits to create from each Kafka partition.
 */
public static void setMaxSplitsPerPartition(final Job job, final int maxSplits) {
    job.getConfiguration().setInt("kafka.max.splits.per.partition", maxSplits);
}

From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java

License:Apache License

/**
 * Sets the fetch size of the {@link RecordReader}. Note that your mapper should have enough memory allocation to
 * handle the specified size, or else you will likely throw {@link OutOfMemoryError}s.
 * /*from  ww  w .  j  a v  a 2 s  . c om*/
 * @param job
 *            the job being configured.
 * @param fetchSize
 *            the fetch size (bytes).
 */
public static void setKafkaFetchSizeBytes(final Job job, final int fetchSize) {
    job.getConfiguration().setInt("kafka.fetch.size", fetchSize);
}