List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.bah.applefox.main.plugins.webcrawler.WebCrawler.java
License:Apache License
/** * run takes the comandline args as arguments (in this case from a * configuration file), creates a new job, configures it, initiates it, * waits for completion, and returns 0 if it is successful (1 if it is not) * //from ww w . j a v a 2s. com * @param args * the commandline arguments (in this case from a configuration * file) * * @return 0 if the job ran successfully and 1 if it isn't */ public int run(String[] args) throws Exception { userAgent = args[6]; String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); String clone = args[5]; String clone2 = args[12]; table = clone; AccumuloUtils.setSplitSize(args[24]); table2 = clone2 + "From"; table3 = clone2 + "To"; job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), clone, new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, clone); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java
License:Apache License
/** * Sets the Zookeeper connection string (required). * //from w w w.j a v a2s .c om * @param job * the job being configured * @param zkConnect * zookeeper connection string. */ public static void setZkConnect(final Job job, final String zkConnect) { job.getConfiguration().set("kafka.zk.connect", zkConnect); }
From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java
License:Apache License
/** * Set the Zookeeper session timeout for Kafka. * // w w w .ja v a 2 s . c o m * @param job * the job being configured. * @param sessionTimeout * the session timeout in milliseconds. */ public static void setZkSessionTimeoutMs(final Job job, final int sessionTimeout) { job.getConfiguration().setInt("kafka.zk.session.timeout.ms", sessionTimeout); }
From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java
License:Apache License
/** * Set the Zookeeper connection timeout for Zookeeper. * //w ww . j a v a2 s . c o m * @param job * the job being configured. * @param connectionTimeout * the connection timeout in milliseconds. */ public static void setZkConnectionTimeoutMs(final Job job, final int connectionTimeout) { job.getConfiguration().setInt("kafka.zk.connection.timeout.ms", connectionTimeout); }
From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java
License:Apache License
/** * Sets the Zookeeper root for Kafka.//from w w w . j a v a 2 s.c o m * * @param job * the job being configured. * @param root * the zookeeper root path. */ public static void setZkRoot(final Job job, final String root) { job.getConfiguration().set("kafka.zk.root", root); }
From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java
License:Apache License
/** * Sets the input topic (required)./*from ww w. j ava2s . co m*/ * * @param job * the job being configured * @param topic * the topic name */ public static void setTopic(final Job job, final String topic) { job.getConfiguration().set("kafka.topic", topic); }
From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java
License:Apache License
/** * Sets the consumer group of the input reader (required). * //w w w .j a v a 2 s . co m * @param job * the job being configured. * @param consumerGroup * consumer group name. */ public static void setConsumerGroup(final Job job, final String consumerGroup) { job.getConfiguration().set("kafka.groupid", consumerGroup); }
From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java
License:Apache License
/** * Only consider partitions created <em>approximately</em> on or after {@code timestamp}. * <p>//from ww w . j av a 2 s . c o m * Note that you are only guaranteed to get all data on or after {@code timestamp}, but you may get <i>some</i> data * before the specified timestamp. * * @param job * the job being configured. * @param timestamp * the timestamp. * @see SimpleConsumer#getOffsetsBefore */ public static void setIncludeOffsetsAfterTimestamp(final Job job, final long timestamp) { job.getConfiguration().setLong("kafka.timestamp.offset", timestamp); }
From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java
License:Apache License
/** * Limits the number of splits to create per partition. * <p>/*from w w w . j ava2s. co m*/ * Note that it if there more partitions to consume than {@code maxSplits}, the input format will take the * <em>earliest</em> Kafka partitions. * * @param job * the job to configure. * @param maxSplits * the maximum number of splits to create from each Kafka partition. */ public static void setMaxSplitsPerPartition(final Job job, final int maxSplits) { job.getConfiguration().setInt("kafka.max.splits.per.partition", maxSplits); }
From source file:com.baynote.kafka.hadoop.KafkaInputFormat.java
License:Apache License
/** * Sets the fetch size of the {@link RecordReader}. Note that your mapper should have enough memory allocation to * handle the specified size, or else you will likely throw {@link OutOfMemoryError}s. * /*from ww w . j a v a 2 s . c om*/ * @param job * the job being configured. * @param fetchSize * the fetch size (bytes). */ public static void setKafkaFetchSizeBytes(final Job job, final int fetchSize) { job.getConfiguration().setInt("kafka.fetch.size", fetchSize); }