List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(new Configuration(conf)); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/*from ww w. j av a2 s. c om*/ job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
/** * Create a map and reduce Hadoop job. Does not set the name on the job. * @param inputPath The input {@link org.apache.hadoop.fs.Path} * @param outputPath The output {@link org.apache.hadoop.fs.Path} * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat} * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class. If the Mapper is a no-op, * this value may be null * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class. If the Mapper is a no-op, * this value may be null * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use * @param reducerKey The reducer key class. * @param reducerValue The reducer value class. * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}. * @param conf The {@link org.apache.hadoop.conf.Configuration} to use. * @return The {@link org.apache.hadoop.mapreduce.Job}. * @throws IOException if there is a problem with the IO. * * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class) * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class, * org.apache.hadoop.conf.Configuration) *//*from w ww. j a va2 s . co m*/ public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } // jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.conductor.hadoop.WritableValueInputFormat.java
License:Apache License
/** * Writes the provided {@code values} to an input file to be read by the {@link Job}, and sets up all additional * necessary configuration.//from ww w.j a v a 2 s .c om * * @param values * the values to be read by the job. * @param clazz * the type of the values. * @param inputsPerSplit * how man inputs each split gets * @param job * the job to configure * @param <V> * the type of the {@code values} * @throws IOException */ public static <V extends Writable> void setupInput(final List<V> values, Class<V> clazz, final int inputsPerSplit, final Job job) throws IOException { final Path inputPath = new Path("job_input_" + System.currentTimeMillis() + UUID.randomUUID().toString()); final Writer writer = SequenceFile.createWriter(FileContext.getFileContext(job.getConfiguration()), job.getConfiguration(), inputPath, NullWritable.class, clazz, CompressionType.NONE, CODEC, new Metadata(), EnumSet.of(CreateFlag.CREATE), DUMMY_VAR_ARGS); doSetupInput(values, clazz, inputsPerSplit, job, inputPath, writer); }
From source file:com.conductor.hadoop.WritableValueInputFormat.java
License:Apache License
@VisibleForTesting static <V extends Writable> void doSetupInput(final List<V> values, final Class<V> clazz, final int inputsPerSplit, final Job job, final Path inputPath, final Writer writer) throws IOException { job.getConfiguration().setClass(VALUE_TYPE_CONF, clazz, Writable.class); job.getConfiguration().setInt(INPUTS_PER_SPLIT_CONF, inputsPerSplit); job.getConfiguration().set(INPUT_FILE_LOCATION_CONF, inputPath.toString()); // write each value to the sequence file int syncCounter = 0; for (final V input : values) { // each entry in the sequence file is a map input writer.append(NullWritable.get(), input); // syncing indicates an input split boundary if (++syncCounter % inputsPerSplit == 0) { writer.sync();//from w w w . j a va2s . c om } } // close the input file writer.hflush(); writer.close(); // delete file when JVM exits inputPath.getFileSystem(job.getConfiguration()).deleteOnExit(inputPath); }
From source file:com.conductor.kafka.hadoop.KafkaRecordReaderTest.java
License:Apache License
@Before public void setUp() throws Exception { conf = new Configuration(false); when(context.getConfiguration()).thenReturn(conf); final Job job = mock(Job.class); when(job.getConfiguration()).thenReturn(conf); KafkaInputFormat.setConsumerGroup(job, "group"); KafkaInputFormat.setKafkaSocketTimeoutMs(job, 1000); KafkaInputFormat.setKafkaBufferSizeBytes(job, 4096); KafkaInputFormat.setKafkaFetchSizeBytes(job, 2048); final Broker broker = new Broker("localhost", 9092, 1); this.partition = new Partition("topic", 0, broker); split = new KafkaInputSplit(partition, 0, 100, true); reader = spy(new KafkaRecordReader()); reader.initialize(split, context);/*from ww w.ja va 2 s . co m*/ }
From source file:com.conversantmedia.mapreduce.io.avro.MultiSchemaAvroSerialization.java
License:Apache License
/** * Register the schemas this serializer will ser/deser to/from. * @param job the job to be configured * @param schemas list of schemas to register (Will assign internal indices * based on the order they're provided.) *//*ww w. j a v a 2 s . c o m*/ public static void registerSchemas(Job job, Schema... schemas) { String[] names = new String[schemas.length]; int idx = 0; for (Schema schema : schemas) { names[idx++] = schema.getFullName(); } job.getConfiguration().setStrings(CONF_KEY_MULTI_SCHEMAS, names); registerSerialization(job); }
From source file:com.conversantmedia.mapreduce.io.avro.MultiSchemaAvroSerialization.java
License:Apache License
/** * Add this class to the list of serializers. * @param job the job for registering serialization *//*from ww w. j a v a 2 s.co m*/ public static void registerSerialization(Job job) { String[] strings = job.getConfiguration().getStrings("io.serializations"); String[] newStrings = new String[strings.length + 1]; System.arraycopy(strings, 0, newStrings, 0, strings.length); newStrings[newStrings.length - 1] = MultiSchemaAvroSerialization.class.getName(); job.getConfiguration().setStrings("io.serializations", newStrings); }
From source file:com.conversantmedia.mapreduce.io.CompositeSortKeySerialization.java
License:Apache License
/** * Convenience method to configure the job for using the composite key. * @param job the job using this serializer * @param groupKeyClass the key type used for grouping * @param sortKeyClass the key type used for sorting */// w w w. ja v a 2s. c om @SuppressWarnings("rawtypes") public static void configureMapOutputKey(Job job, Class<? extends WritableComparable> groupKeyClass, Class<? extends WritableComparable> sortKeyClass) { // First, setup our classes... job.getConfiguration().set(CONF_KEY_GROUPKEY_CLASS, groupKeyClass.getName()); job.getConfiguration().set(CONF_KEY_SORTKEY_CLASS, sortKeyClass.getName()); // Set this class as our map output key job.setMapOutputKeyClass(CompositeSortKey.class); // Setup the partitioner and comparators. job.setPartitionerClass(CompositeSortKey.KeyPartitioner.class); job.setGroupingComparatorClass(CompositeSortKey.GroupingComparator.class); job.setSortComparatorClass(CompositeSortKey.NaturalSortComparator.class); // Now setup the serialization by registering with the framework. Collection<String> serializations = new ArrayList<>(); serializations.add(CompositeSortKeySerialization.class.getName()); serializations.addAll(job.getConfiguration().getStringCollection("io.serializations")); job.getConfiguration().setStrings("io.serializations", serializations.toArray(new String[serializations.size()])); }
From source file:com.conversantmedia.mapreduce.io.CompositeSortKeyTest.java
License:Apache License
@Before public void setup() { TestMapper mapper = new TestMapper(); TestReducer reducer = new TestReducer(); driver = MapReduceDriver.newMapReduceDriver(mapper, reducer); Configuration conf = driver.getConfiguration(); Job job = mock(Job.class); when(job.getConfiguration()).thenReturn(conf); CompositeSortKeySerialization.configureMapOutputKey(job, Text.class, IntWritable.class); // MRUnit sets these differently than standard MapReduce: driver.setKeyGroupingComparator(new CompositeSortKey.GroupingComparator<Text, IntWritable>()); }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.AvroJobInfoAnnotationHandler.java
License:Apache License
@Override public void process(Annotation annotation, Job job, Object target) throws ToolException { AvroJobInfo avroInfo = (AvroJobInfo) annotation; if (avroInfo.inputKeySchema() != AvroDefault.class) { AvroJob.setInputKeySchema(job, getSchema(avroInfo.inputKeySchema())); }//ww w . ja va 2s.c o m if (avroInfo.inputValueSchema() != AvroDefault.class) { AvroJob.setInputValueSchema(job, getSchema(avroInfo.inputValueSchema())); } if (avroInfo.outputKeySchema() != AvroDefault.class) { AvroJob.setOutputKeySchema(job, getSchema(avroInfo.outputKeySchema())); } if (avroInfo.outputValueSchema() != AvroDefault.class) { AvroJob.setOutputValueSchema(job, getSchema(avroInfo.outputValueSchema())); } if (avroInfo.mapOutputKeySchema() != AvroDefault.class) { AvroJob.setMapOutputKeySchema(job, getSchema(avroInfo.mapOutputKeySchema())); } if (avroInfo.mapOutputValueSchema() != AvroDefault.class) { AvroJob.setMapOutputValueSchema(job, getSchema(avroInfo.mapOutputValueSchema())); } AvroSerialization.addToConfiguration(job.getConfiguration()); }