List of usage examples for org.apache.hadoop.mapreduce Job getInputFormatClass
@SuppressWarnings("unchecked") public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException
From source file:ComRoughSetApproInputSampler.java
License:Apache License
/** * Write a partition file for the given job, using the Sampler provided. * Queries the sampler for a sample keyset, sorts by the output key * comparator, selects the keys for each rank, and writes to the destination * returned from {@link TotalOrderPartitioner#getPartitionFile}. *//*from ww w. j a va 2 s . c o m*/ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = job.getConfiguration(); final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = (K[]) sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.MultipleInputsTest.java
License:Apache License
@Test public void testConfigurations() throws IOException, ClassNotFoundException { Job job = Job.getInstance(); String inputName1 = "inputName1"; String inputFormatClass1 = TextInputFormat.class.getName(); Map<String, String> inputFormatConfigs1 = ImmutableMap.of("key1", "val1", "key2", "val2"); MultipleInputs.addInput(job, inputName1, inputFormatClass1, inputFormatConfigs1, job.getMapperClass()); Map<String, MultipleInputs.MapperInput> map = MultipleInputs.getInputMap(job.getConfiguration()); Assert.assertEquals(1, map.size());/*from ww w .j a va 2 s. c om*/ Assert.assertEquals(inputName1, Iterables.getOnlyElement(map.keySet())); Assert.assertEquals(inputFormatClass1, Iterables.getOnlyElement(map.values()).getInputFormatClassName()); Assert.assertEquals(inputFormatConfigs1, Iterables.getOnlyElement(map.values()).getInputFormatConfiguration()); Assert.assertEquals(job.getMapperClass().getName(), Iterables.getOnlyElement(map.values()).getMapperClassName()); Assert.assertEquals(DelegatingInputFormat.class, job.getInputFormatClass()); // now, test with two inputs in the configuration String inputName2 = "inputName2"; String inputFormatClass2 = TextInputFormat.class.getName(); Map<String, String> inputFormatConfigs2 = ImmutableMap.of("some_key1", "some_val1", "some_key2", "some_val2"); MultipleInputs.addInput(job, inputName2, inputFormatClass2, inputFormatConfigs2, CustomMapper.class); map = MultipleInputs.getInputMap(job.getConfiguration()); Assert.assertEquals(2, map.size()); MultipleInputs.MapperInput mapperInput1 = map.get(inputName1); Assert.assertEquals(inputFormatClass1, mapperInput1.getInputFormatClassName()); Assert.assertEquals(inputFormatConfigs1, mapperInput1.getInputFormatConfiguration()); Assert.assertEquals(job.getMapperClass().getName(), mapperInput1.getMapperClassName()); MultipleInputs.MapperInput mapperInput2 = map.get(inputName2); Assert.assertEquals(inputFormatClass2, mapperInput2.getInputFormatClassName()); Assert.assertEquals(inputFormatConfigs2, mapperInput2.getInputFormatConfiguration()); Assert.assertEquals(CustomMapper.class, job.getConfiguration().getClassByName(mapperInput2.getMapperClassName())); }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Creates a jar that contains everything that are needed for running the MapReduce program by Hadoop. * * @return a new {@link File} containing the job jar */// w w w.j a va 2 s.c o m private File buildJobJar(Job job, File tempDir) throws IOException, URISyntaxException { File jobJar = new File(tempDir, "job.jar"); LOG.debug("Creating Job jar: {}", jobJar); // For local mode, nothing is needed in the job jar since we use the classloader in the configuration object. if (MapReduceTaskContextProvider.isLocal(job.getConfiguration())) { JarOutputStream output = new JarOutputStream(new FileOutputStream(jobJar)); output.close(); return jobJar; } // Excludes libraries that are for sure not needed. // Hadoop - Available from the cluster // Spark - MR never uses Spark final HadoopClassExcluder hadoopClassExcluder = new HadoopClassExcluder(); ApplicationBundler appBundler = new ApplicationBundler(new ClassAcceptor() { @Override public boolean accept(String className, URL classUrl, URL classPathUrl) { if (className.startsWith("org.apache.spark") || classPathUrl.toString().contains("spark-assembly")) { return false; } return hadoopClassExcluder.accept(className, classUrl, classPathUrl); } }); Set<Class<?>> classes = Sets.newHashSet(); classes.add(MapReduce.class); classes.add(MapperWrapper.class); classes.add(ReducerWrapper.class); // We only need to trace the Input/OutputFormat class due to MAPREDUCE-5957 so that those classes are included // in the job.jar and be available in the MR system classpath before our job classloader (ApplicationClassLoader) // take over the classloading. if (cConf.getBoolean(Constants.AppFabric.MAPREDUCE_INCLUDE_CUSTOM_CLASSES)) { try { Class<? extends InputFormat<?, ?>> inputFormatClass = job.getInputFormatClass(); LOG.info("InputFormat class: {} {}", inputFormatClass, inputFormatClass.getClassLoader()); classes.add(inputFormatClass); // If it is StreamInputFormat, also add the StreamEventCodec class as well. if (StreamInputFormat.class.isAssignableFrom(inputFormatClass)) { Class<? extends StreamEventDecoder> decoderType = StreamInputFormat .getDecoderClass(job.getConfiguration()); if (decoderType != null) { classes.add(decoderType); } } } catch (Throwable t) { LOG.info("InputFormat class not found: {}", t.getMessage(), t); // Ignore } try { Class<? extends OutputFormat<?, ?>> outputFormatClass = job.getOutputFormatClass(); LOG.info("OutputFormat class: {} {}", outputFormatClass, outputFormatClass.getClassLoader()); classes.add(outputFormatClass); } catch (Throwable t) { LOG.info("OutputFormat class not found: {}", t.getMessage(), t); // Ignore } } // End of MAPREDUCE-5957. try { Class<?> hbaseTableUtilClass = HBaseTableUtilFactory.getHBaseTableUtilClass(); classes.add(hbaseTableUtilClass); } catch (ProvisionException e) { LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available"); } ClassLoader oldCLassLoader = ClassLoaders.setContextClassLoader(job.getConfiguration().getClassLoader()); appBundler.createBundle(Locations.toLocation(jobJar), classes); ClassLoaders.setContextClassLoader(oldCLassLoader); LOG.info("Built MapReduce Job Jar at {}", jobJar.toURI()); return jobJar; }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) private void runMap(Job job, KeyValueSorter<?, ?> sorter) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = job.getConfiguration(); InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); List<InputSplit> splits = input.getSplits(job); int serial = 1; for (InputSplit split : splits) { TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0); Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$ mapper.getClass().getName(), id, split.getLength())); }/* w ww . j ava 2 s . c om*/ TaskAttemptContext context = newTaskAttemptContext(conf, id); // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); OutputCommitter committer = output.getOutputCommitter(context); committer.setupTask(context); boolean succeed = false; try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) { RecordWriter<?, ?> writer; if (sorter != null) { writer = new ShuffleWriter(sorter); } else { writer = output.getRecordWriter(newTaskAttemptContext(conf, id)); } try { Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split); reader.initialize(split, c); mapper.run(c); } finally { writer.close(newTaskAttemptContext(conf, id)); } doCommitTask(context, committer); succeed = true; } finally { if (succeed == false) { doAbortTask(context, committer); } } } }
From source file:com.baynote.kafka.hadoop.KafkaJobBuilderTest.java
License:Apache License
@Test public void testConfigureWholeJob() throws Exception { // base configuration builder.setZkConnect("localhost:2181"); builder.addQueueInput("queue_name", "group_name", MockMapper.class); builder.setTextFileOutputFormat("/a/hdfs/path"); // extended configuration builder.setJobName("job_name"); builder.setMapOutputKeyClass(Text.class); builder.setMapOutputValueClass(BytesWritable.class); builder.setReducerClass(MockReducer.class); builder.setTaskMemorySettings("-Xmx2048m"); builder.setNumReduceTasks(100);// ww w . ja v a 2 s . c om builder.setParitioner(MockPartitioner.class); builder.setKafkaFetchSizeBytes(1024); Job job = builder.configureJob(conf); assertEquals("job_name", job.getJobName()); assertEquals(Text.class, job.getMapOutputKeyClass()); assertEquals(BytesWritable.class, job.getMapOutputValueClass()); assertEquals(MockReducer.class, job.getReducerClass()); assertEquals(MockMapper.class, job.getMapperClass()); assertEquals("-Xmx2048m", job.getConfiguration().get("mapred.child.java.opts")); assertEquals(100, job.getNumReduceTasks()); assertEquals(MockPartitioner.class, job.getPartitionerClass()); assertEquals(1024, KafkaInputFormat.getKafkaFetchSizeBytes(job.getConfiguration())); assertEquals(TextOutputFormat.class, job.getOutputFormatClass()); assertEquals(KafkaInputFormat.class, job.getInputFormatClass()); assertEquals("file:/a/hdfs/path", TextOutputFormat.getOutputPath(job).toString()); builder.setJobName(null); builder.setSequenceFileOutputFormat(); builder.setUseLazyOutput(); builder.addQueueInput("queue_name_2", "group_name_2", MockMapper.class); job = builder.configureJob(conf); assertEquals(LazyOutputFormat.class, job.getOutputFormatClass()); assertEquals(MultipleKafkaInputFormat.class, job.getInputFormatClass()); assertEquals(DelegatingMapper.class, job.getMapperClass()); assertEquals(BytesWritable.class, job.getOutputKeyClass()); assertEquals(BytesWritable.class, job.getOutputValueClass()); assertNotNull(SequenceFileOutputFormat.getOutputPath(job)); assertNotNull(job.getJobName()); // use s3 builder.useS3("my_aws_key", "s3cr3t", "my-bucket"); builder.setTextFileOutputFormat("/a/hdfs/path"); job = builder.configureJob(conf); assertEquals("my_aws_key", job.getConfiguration().get("fs.s3n.awsAccessKeyId")); assertEquals("s3cr3t", job.getConfiguration().get("fs.s3n.awsSecretAccessKey")); assertEquals("my_aws_key", job.getConfiguration().get("fs.s3.awsAccessKeyId")); assertEquals("s3cr3t", job.getConfiguration().get("fs.s3.awsSecretAccessKey")); }
From source file:com.cloudera.castagna.logparser.Utils.java
License:Apache License
public static void log(Job job, Logger log) throws ClassNotFoundException { log.debug("{} -> {} ({}, {}) -> {}#{} ({}, {}) -> {}", new Object[] { job.getInputFormatClass().getSimpleName(), job.getMapperClass().getSimpleName(), job.getMapOutputKeyClass().getSimpleName(), job.getMapOutputValueClass().getSimpleName(), job.getReducerClass().getSimpleName(), job.getNumReduceTasks(), job.getOutputKeyClass().getSimpleName(), job.getOutputValueClass().getSimpleName(), job.getOutputFormatClass().getSimpleName() }); Path[] inputs = FileInputFormat.getInputPaths(job); Path output = FileOutputFormat.getOutputPath(job); log.debug("input: {}", inputs[0]); log.debug("output: {}", output); }
From source file:com.example.Driver.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Your job name"); job.setJarByClass(Driver.class); logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: " + Arrays.toString(args)); if (args.length < 2) { logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar() + " input_files output_directory"); return 1; }//from ww w . ja va 2 s .c o m job.setMapperClass(WordcountMapper.class); logger.info("mapper class is " + job.getMapperClass()); //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(IntWritable.class); logger.info("mapper output key class is " + job.getMapOutputKeyClass()); logger.info("mapper output value class is " + job.getMapOutputValueClass()); job.setReducerClass(WordcountReducer.class); logger.info("reducer class is " + job.getReducerClass()); job.setCombinerClass(WordcountReducer.class); logger.info("combiner class is " + job.getCombinerClass()); //When you are not runnign any Reducer //OR job.setNumReduceTasks(0); // logger.info("number of reduce task is " + job.getNumReduceTasks()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); logger.info("output key class is " + job.getOutputKeyClass()); logger.info("output value class is " + job.getOutputValueClass()); job.setInputFormatClass(TextInputFormat.class); logger.info("input format class is " + job.getInputFormatClass()); job.setOutputFormatClass(TextOutputFormat.class); logger.info("output format class is " + job.getOutputFormatClass()); Path filePath = new Path(args[0]); logger.info("input path " + filePath); FileInputFormat.setInputPaths(job, filePath); Path outputPath = new Path(args[1]); logger.info("output path " + outputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); return 0; }
From source file:com.moz.fiji.mapreduce.TestFijiBulkImportJobBuilder.java
License:Apache License
@Test public void testBuildWithHFileOutput() throws Exception { final FijiMapReduceJob mrjob = FijiBulkImportJobBuilder.create().withConf(getConf()) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(mTempPath, "input"))) .withBulkImporter(NoopBulkImporter.class).withOutput(MapReduceJobOutputs .newHFileMapReduceJobOutput(mTable.getURI(), new Path(mTempPath, "output"), 10)) .build();//ww w . j a v a2 s . c om final Job job = mrjob.getHadoopJob(); assertEquals(TextInputFormat.class, job.getInputFormatClass()); assertEquals(BulkImportMapper.class, job.getMapperClass()); assertEquals(NoopBulkImporter.class, job.getConfiguration().getClass(FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, null)); assertEquals(IdentityReducer.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass()); }
From source file:com.moz.fiji.mapreduce.TestFijiBulkImportJobBuilder.java
License:Apache License
@Test public void testBuildWithKeyValueStore() throws Exception { final FijiMapReduceJob mrjob = FijiBulkImportJobBuilder.create().withConf(getConf()) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(mTempPath, "input"))) .withBulkImporter(KVStoreBulkImporter.class).withOutput(MapReduceJobOutputs .newHFileMapReduceJobOutput(mTable.getURI(), new Path(mTempPath, "output"), 10)) .build();/* w w w.j av a2 s.c o m*/ final Job job = mrjob.getHadoopJob(); // Verify that everything else is what we expected as in the previous test // (except the bulk importer class name)... assertEquals(TextInputFormat.class, job.getInputFormatClass()); assertEquals(BulkImportMapper.class, job.getMapperClass()); assertEquals(KVStoreBulkImporter.class, job.getConfiguration().getClass(FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, null)); assertEquals(IdentityReducer.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass()); // KeyValueStore-specific checks here. final Configuration confOut = job.getConfiguration(); assertEquals(1, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("foostore", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_NAME)); }
From source file:com.moz.fiji.mapreduce.TestFijiMapReduceJobBuilder.java
License:Apache License
@Test public void testBuild() throws Exception { final FijiMapReduceJob job = FijiMapReduceJobBuilder.create().withConf(mConf) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path("/path/to/my/input"))) .withMapper(MyMapper.class).withReducer(MyReducer.class) .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("/path/to/my/output"), 16)) .build();//from w w w . j ava 2 s.c om final Job hadoopJob = job.getHadoopJob(); assertEquals(TextInputFormat.class, hadoopJob.getInputFormatClass()); assertEquals(MyMapper.class, hadoopJob.getMapperClass()); assertEquals(MyReducer.class, hadoopJob.getReducerClass()); assertEquals(16, hadoopJob.getNumReduceTasks()); assertEquals(TextOutputFormat.class, hadoopJob.getOutputFormatClass()); // KeyValueStore-specific checks here. Configuration confOut = hadoopJob.getConfiguration(); assertEquals(2, confOut.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("mapperMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_NAME)); assertEquals(EmptyKeyValueStore.class.getName(), confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("reducerMap", confOut.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "1." + KeyValueStoreConfigSerializer.CONF_NAME)); }