List of usage examples for org.apache.hadoop.mapreduce Job getOutputFormatClass
@SuppressWarnings("unchecked") public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Creates a jar that contains everything that are needed for running the MapReduce program by Hadoop. * * @return a new {@link File} containing the job jar *///from w ww . ja v a 2s. c o m private File buildJobJar(Job job, File tempDir) throws IOException, URISyntaxException { File jobJar = new File(tempDir, "job.jar"); LOG.debug("Creating Job jar: {}", jobJar); // For local mode, nothing is needed in the job jar since we use the classloader in the configuration object. if (MapReduceTaskContextProvider.isLocal(job.getConfiguration())) { JarOutputStream output = new JarOutputStream(new FileOutputStream(jobJar)); output.close(); return jobJar; } // Excludes libraries that are for sure not needed. // Hadoop - Available from the cluster // Spark - MR never uses Spark final HadoopClassExcluder hadoopClassExcluder = new HadoopClassExcluder(); ApplicationBundler appBundler = new ApplicationBundler(new ClassAcceptor() { @Override public boolean accept(String className, URL classUrl, URL classPathUrl) { if (className.startsWith("org.apache.spark") || classPathUrl.toString().contains("spark-assembly")) { return false; } return hadoopClassExcluder.accept(className, classUrl, classPathUrl); } }); Set<Class<?>> classes = Sets.newHashSet(); classes.add(MapReduce.class); classes.add(MapperWrapper.class); classes.add(ReducerWrapper.class); // We only need to trace the Input/OutputFormat class due to MAPREDUCE-5957 so that those classes are included // in the job.jar and be available in the MR system classpath before our job classloader (ApplicationClassLoader) // take over the classloading. if (cConf.getBoolean(Constants.AppFabric.MAPREDUCE_INCLUDE_CUSTOM_CLASSES)) { try { Class<? extends InputFormat<?, ?>> inputFormatClass = job.getInputFormatClass(); LOG.info("InputFormat class: {} {}", inputFormatClass, inputFormatClass.getClassLoader()); classes.add(inputFormatClass); // If it is StreamInputFormat, also add the StreamEventCodec class as well. if (StreamInputFormat.class.isAssignableFrom(inputFormatClass)) { Class<? extends StreamEventDecoder> decoderType = StreamInputFormat .getDecoderClass(job.getConfiguration()); if (decoderType != null) { classes.add(decoderType); } } } catch (Throwable t) { LOG.info("InputFormat class not found: {}", t.getMessage(), t); // Ignore } try { Class<? extends OutputFormat<?, ?>> outputFormatClass = job.getOutputFormatClass(); LOG.info("OutputFormat class: {} {}", outputFormatClass, outputFormatClass.getClassLoader()); classes.add(outputFormatClass); } catch (Throwable t) { LOG.info("OutputFormat class not found: {}", t.getMessage(), t); // Ignore } } // End of MAPREDUCE-5957. try { Class<?> hbaseTableUtilClass = HBaseTableUtilFactory.getHBaseTableUtilClass(); classes.add(hbaseTableUtilClass); } catch (ProvisionException e) { LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available"); } ClassLoader oldCLassLoader = ClassLoaders.setContextClassLoader(job.getConfiguration().getClassLoader()); appBundler.createBundle(Locations.toLocation(jobJar), classes); ClassLoaders.setContextClassLoader(oldCLassLoader); LOG.info("Built MapReduce Job Jar at {}", jobJar.toURI()); return jobJar; }
From source file:co.nubetech.hiho.dedup.DedupJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); populateConfiguration(args);//ww w .ja v a 2s.co m try { checkMandatoryConfs(); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Job job = new Job(conf); job.setJobName("Dedup job"); job.setJarByClass(DedupJob.class); Class inputFormatClass = Class.forName(inputFormat); Class outputFormatClass = Class.forName(outputFormat); Class inputKeyClass = Class.forName(inputKeyClassName); Class inputValueClass = Class.forName(inputValueClassName); if (dedupBy.equals("key")) { job.setMapperClass(DedupKeyMapper.class); job.setReducerClass(DedupKeyReducer.class); job.setMapOutputValueClass(inputValueClass); } else if (dedupBy.equals("value")) { job.setMapperClass(DedupValueMapper.class); job.setReducerClass(DedupValueReducer.class); job.setMapOutputValueClass(inputKeyClass); } job.setInputFormatClass(inputFormatClass); if (inputFormat.equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) { DelimitedTextInputFormat.setProperties(job, delimiter, column); } job.setMapOutputKeyClass(HihoTuple.class); job.setOutputKeyClass(inputKeyClass); job.setOutputValueClass(inputValueClass); job.setPartitionerClass(HihoHashPartitioner.class); FileInputFormat.setInputPaths(job, inputPath); job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(outputPath)); try { logger.debug("Output format class is " + job.getOutputFormatClass()); logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName()); job.waitForCompletion(false); if (job.isComplete()) { Counters counters = job.getCounters(); totalRecordsRead = counters.findCounter(DedupRecordCounter.TOTAL_RECORDS_READ).getValue(); badRecords = counters.findCounter(DedupRecordCounter.BAD_RECORD).getValue(); output = counters.findCounter(DedupRecordCounter.OUTPUT).getValue(); duplicateRecords = totalRecordsRead - output; logger.info("Total records read are: " + totalRecordsRead); logger.info("Bad Records are: " + badRecords); logger.info("Output records are: " + output); logger.info("Duplicate records are: " + duplicateRecords); } } catch (Exception e) { e.printStackTrace(); } return 0; }
From source file:co.nubetech.hiho.job.DBQueryInputJob.java
License:Apache License
public void runJobs(Configuration conf, int jobCounter) throws IOException { try {//from www. j a va2s. com checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new IOException(e1); } Job job = new Job(conf); for (Entry<String, String> entry : conf) { logger.warn("key, value " + entry.getKey() + "=" + entry.getValue()); } // logger.debug("Number of maps " + // conf.getInt("mapred.map.tasks", 1)); // conf.setInt(JobContext.NUM_MAPS, // conf.getInt("mapreduce.job.maps", 1)); // job.getConfiguration().setInt("mapred.map.tasks", 4); job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1)); logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1)); job.setJobName("Import job"); job.setJarByClass(DBQueryInputJob.class); String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY); OutputStrategyEnum os = OutputStrategyEnum.value(strategy); if (os == null) { throw new IllegalArgumentException("Wrong value of output strategy. Please correct"); } if (os != OutputStrategyEnum.AVRO) { switch (os) { case DUMP: { // job.setMapperClass(DBImportMapper.class); break; } /* * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); // * need avro in cp // job.setJarByClass(Schema.class); // need * jackson which is needed by avro - ugly! // * job.setJarByClass(ObjectMapper.class); * job.setMapOutputKeyClass(NullWritable.class); * job.setMapOutputValueClass(AvroValue.class); * job.setOutputKeyClass(NullWritable.class); * job.setOutputValueClass(AvroValue.class); * job.setOutputFormatClass(AvroOutputFormat.class); * * AvroOutputFormat.setOutputPath(job, new * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; } */ case DELIMITED: { job.setMapperClass(DBInputDelimMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class); NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); } case JSON: { // job.setMapperClass(DBImportJsonMapper.class); // job.setJarByClass(ObjectMapper.class); break; } default: { job.setMapperClass(DBInputDelimMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class); NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; } } String inputQuery = conf.get(DBConfiguration.INPUT_QUERY); String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY); logger.debug("About to set the params"); DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery, params); logger.debug("Set the params"); job.setNumReduceTasks(0); try { // job.setJarByClass(Class.forName(conf.get( // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY))); logger.debug("OUTPUT format class is " + job.getOutputFormatClass()); /* * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output = * ReflectionUtils.newInstance(job.getOutputFormatClass(), * job.getConfiguration()); output.checkOutputSpecs(job); */ logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName()); job.waitForCompletion(false); if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) { generateHiveScript(conf, job, jobCounter); generatePigScript(conf, job); } } /* * catch (HIHOException h) { h.printStackTrace(); } */ catch (Exception e) { e.printStackTrace(); } catch (HIHOException e) { e.printStackTrace(); } } // avro to be handled differently, thanks to all the incompatibilities // in the apis. else { String inputQuery = conf.get(DBConfiguration.INPUT_QUERY); String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY); logger.debug("About to set the params"); // co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(job, // inputQuery, inputBoundingQuery, params); logger.debug("Set the params"); JobConf jobConf = new JobConf(conf); try { GenericDBWritable queryWritable = getDBWritable(jobConf); Schema pair = DBMapper.getPairSchema(queryWritable.getColumns()); AvroJob.setMapOutputSchema(jobConf, pair); GenericRecordAvroOutputFormat.setOutputPath(jobConf, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(jobConf, inputQuery, inputBoundingQuery, params); jobConf.setInputFormat(co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.class); jobConf.setMapperClass(DBInputAvroMapper.class); jobConf.setMapOutputKeyClass(NullWritable.class); jobConf.setMapOutputValueClass(AvroValue.class); jobConf.setOutputKeyClass(NullWritable.class); jobConf.setOutputValueClass(Text.class); jobConf.setOutputFormat(GenericRecordAvroOutputFormat.class); jobConf.setJarByClass(DBQueryInputJob.class); jobConf.setStrings("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization,org.apache.avro.mapred.AvroSerialization"); jobConf.setNumReduceTasks(0); /* * jobConf.setOutputFormat(org.apache.hadoop.mapred. * SequenceFileOutputFormat.class); * org.apache.hadoop.mapred.SequenceFileOutputFormat * .setOutputPath(jobConf, new * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); */ JobClient.runJob(jobConf); } catch (Throwable e) { e.printStackTrace(); } } }
From source file:co.nubetech.hiho.merge.MergeJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { populateConfiguration(args);/*from w w w . j a v a2 s . c om*/ try { checkMandatoryConfs(); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Class inputFormatClass = Class.forName(inputFormat); Class outputFormatClass = Class.forName(outputFormat); Class inputKeyClass = Class.forName(inputKeyClassName); Class inputValueClass = Class.forName(inputValueClassName); Configuration conf = getConf(); conf.set(HIHOConf.MERGE_OLD_PATH, oldPath); conf.set(HIHOConf.MERGE_NEW_PATH, newPath); Job job = new Job(conf); job.setJobName("Merge job"); job.setJarByClass(MergeJob.class); if (mergeBy.equals("key")) { job.setMapperClass(MergeKeyMapper.class); job.setReducerClass(MergeKeyReducer.class); } else if (mergeBy.equals("value")) { job.setMapperClass(MergeValueMapper.class); job.setReducerClass(MergeValueReducer.class); } job.setInputFormatClass(inputFormatClass); DelimitedTextInputFormat.setProperties(job, delimiter, column); job.setMapOutputKeyClass(HihoTuple.class); job.setMapOutputValueClass(HihoValue.class); job.setOutputKeyClass(inputKeyClass); job.setOutputValueClass(inputValueClass); FileInputFormat.setInputPaths(job, oldPath + "," + newPath); job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(outputPath)); try { logger.debug("Output format class is " + job.getOutputFormatClass()); logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName()); job.waitForCompletion(false); if (job.isComplete()) { Counters counters = job.getCounters(); totalRecordsOld = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_OLD).getValue(); totalRecordsNew = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue(); badRecords = counters.findCounter(MergeRecordCounter.BAD_RECORD).getValue(); output = counters.findCounter(MergeRecordCounter.OUTPUT).getValue(); logger.info("Total old records read are: " + totalRecordsOld); logger.info("Total new records read are: " + totalRecordsNew); logger.info("Bad Records are: " + badRecords); logger.info("Output records are: " + output); } } catch (Exception e) { e.printStackTrace(); } return 0; }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
private void runJob(Job job) throws ClassNotFoundException, IOException, InterruptedException { assert job.getJobID() != null; TaskID taskId = newMapTaskId(job.getJobID(), 0); Configuration conf = job.getConfiguration(); OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); OutputCommitter committer = output/*from w w w.j a v a 2 s .c om*/ .getOutputCommitter(newTaskAttemptContext(conf, newTaskAttemptId(taskId, 0))); boolean succeed = false; committer.setupJob(job); try { if (job.getNumReduceTasks() == 0) { runMap(job, null); } else { try (KeyValueSorter<?, ?> sorter = createSorter(job, job.getMapOutputKeyClass(), job.getMapOutputValueClass())) { runMap(job, sorter); runReduce(job, sorter); } } committer.commitJob(job); succeed = true; } finally { if (succeed == false) { try { committer.abortJob(job, State.FAILED); } catch (IOException e) { LOG.error(MessageFormat.format("error occurred while aborting job: {0} ({1})", job.getJobID(), job.getJobName()), e); } } } }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) private void runMap(Job job, KeyValueSorter<?, ?> sorter) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = job.getConfiguration(); InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); List<InputSplit> splits = input.getSplits(job); int serial = 1; for (InputSplit split : splits) { TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0); Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$ mapper.getClass().getName(), id, split.getLength())); }//from w w w. j a v a 2s .co m TaskAttemptContext context = newTaskAttemptContext(conf, id); // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); OutputCommitter committer = output.getOutputCommitter(context); committer.setupTask(context); boolean succeed = false; try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) { RecordWriter<?, ?> writer; if (sorter != null) { writer = new ShuffleWriter(sorter); } else { writer = output.getRecordWriter(newTaskAttemptContext(conf, id)); } try { Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split); reader.initialize(split, c); mapper.run(c); } finally { writer.close(newTaskAttemptContext(conf, id)); } doCommitTask(context, committer); succeed = true; } finally { if (succeed == false) { doAbortTask(context, committer); } } } }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) private void runReduce(Job job, KeyValueSorter<?, ?> sorter) throws ClassNotFoundException, IOException, InterruptedException { Configuration conf = job.getConfiguration(); OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); TaskAttemptID id = newTaskAttemptId(newReduceTaskId(job.getJobID(), 1), 0); Reducer<?, ?, ?, ?> reducer = ReflectionUtils.newInstance(job.getReducerClass(), conf); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("starting reducer: {0}@{1} ({2}records, {3}bytes)", //$NON-NLS-1$ reducer.getClass().getName(), id, sorter.getRecordCount(), sorter.getSizeInBytes())); }//from w w w . j a va 2 s.c o m TaskAttemptContext context = newTaskAttemptContext(conf, id); OutputCommitter committer = output.getOutputCommitter(context); committer.setupTask(context); boolean succeed = false; try { ShuffleReader reader = new ShuffleReader(sorter, new Progress()); try { RecordWriter<?, ?> writer = output.getRecordWriter(newTaskAttemptContext(conf, id)); try { Reducer.Context c = newReducerContext(conf, id, reader, sorter.getKeyClass(), sorter.getValueClass(), writer, committer, (RawComparator) job.getGroupingComparator()); reducer.run(c); } finally { writer.close(newTaskAttemptContext(conf, id)); } } finally { try { reader.close(); } catch (IOException e) { LOG.warn(MessageFormat.format("error occurred while reducer mapper input: {0} ({1})", id, job.getJobName()), e); } } doCommitTask(context, committer); succeed = true; } finally { if (succeed == false) { doAbortTask(context, committer); } } }
From source file:com.baynote.kafka.hadoop.KafkaJobBuilderTest.java
License:Apache License
@Test public void testConfigureWholeJob() throws Exception { // base configuration builder.setZkConnect("localhost:2181"); builder.addQueueInput("queue_name", "group_name", MockMapper.class); builder.setTextFileOutputFormat("/a/hdfs/path"); // extended configuration builder.setJobName("job_name"); builder.setMapOutputKeyClass(Text.class); builder.setMapOutputValueClass(BytesWritable.class); builder.setReducerClass(MockReducer.class); builder.setTaskMemorySettings("-Xmx2048m"); builder.setNumReduceTasks(100);/*from w w w . j av a 2 s . com*/ builder.setParitioner(MockPartitioner.class); builder.setKafkaFetchSizeBytes(1024); Job job = builder.configureJob(conf); assertEquals("job_name", job.getJobName()); assertEquals(Text.class, job.getMapOutputKeyClass()); assertEquals(BytesWritable.class, job.getMapOutputValueClass()); assertEquals(MockReducer.class, job.getReducerClass()); assertEquals(MockMapper.class, job.getMapperClass()); assertEquals("-Xmx2048m", job.getConfiguration().get("mapred.child.java.opts")); assertEquals(100, job.getNumReduceTasks()); assertEquals(MockPartitioner.class, job.getPartitionerClass()); assertEquals(1024, KafkaInputFormat.getKafkaFetchSizeBytes(job.getConfiguration())); assertEquals(TextOutputFormat.class, job.getOutputFormatClass()); assertEquals(KafkaInputFormat.class, job.getInputFormatClass()); assertEquals("file:/a/hdfs/path", TextOutputFormat.getOutputPath(job).toString()); builder.setJobName(null); builder.setSequenceFileOutputFormat(); builder.setUseLazyOutput(); builder.addQueueInput("queue_name_2", "group_name_2", MockMapper.class); job = builder.configureJob(conf); assertEquals(LazyOutputFormat.class, job.getOutputFormatClass()); assertEquals(MultipleKafkaInputFormat.class, job.getInputFormatClass()); assertEquals(DelegatingMapper.class, job.getMapperClass()); assertEquals(BytesWritable.class, job.getOutputKeyClass()); assertEquals(BytesWritable.class, job.getOutputValueClass()); assertNotNull(SequenceFileOutputFormat.getOutputPath(job)); assertNotNull(job.getJobName()); // use s3 builder.useS3("my_aws_key", "s3cr3t", "my-bucket"); builder.setTextFileOutputFormat("/a/hdfs/path"); job = builder.configureJob(conf); assertEquals("my_aws_key", job.getConfiguration().get("fs.s3n.awsAccessKeyId")); assertEquals("s3cr3t", job.getConfiguration().get("fs.s3n.awsSecretAccessKey")); assertEquals("my_aws_key", job.getConfiguration().get("fs.s3.awsAccessKeyId")); assertEquals("s3cr3t", job.getConfiguration().get("fs.s3.awsSecretAccessKey")); }
From source file:com.cloudera.castagna.logparser.Utils.java
License:Apache License
public static void log(Job job, Logger log) throws ClassNotFoundException { log.debug("{} -> {} ({}, {}) -> {}#{} ({}, {}) -> {}", new Object[] { job.getInputFormatClass().getSimpleName(), job.getMapperClass().getSimpleName(), job.getMapOutputKeyClass().getSimpleName(), job.getMapOutputValueClass().getSimpleName(), job.getReducerClass().getSimpleName(), job.getNumReduceTasks(), job.getOutputKeyClass().getSimpleName(), job.getOutputValueClass().getSimpleName(), job.getOutputFormatClass().getSimpleName() }); Path[] inputs = FileInputFormat.getInputPaths(job); Path output = FileOutputFormat.getOutputPath(job); log.debug("input: {}", inputs[0]); log.debug("output: {}", output); }
From source file:com.example.Driver.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Your job name"); job.setJarByClass(Driver.class); logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: " + Arrays.toString(args)); if (args.length < 2) { logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar() + " input_files output_directory"); return 1; }/*from w w w . j a v a 2s. c om*/ job.setMapperClass(WordcountMapper.class); logger.info("mapper class is " + job.getMapperClass()); //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(IntWritable.class); logger.info("mapper output key class is " + job.getMapOutputKeyClass()); logger.info("mapper output value class is " + job.getMapOutputValueClass()); job.setReducerClass(WordcountReducer.class); logger.info("reducer class is " + job.getReducerClass()); job.setCombinerClass(WordcountReducer.class); logger.info("combiner class is " + job.getCombinerClass()); //When you are not runnign any Reducer //OR job.setNumReduceTasks(0); // logger.info("number of reduce task is " + job.getNumReduceTasks()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); logger.info("output key class is " + job.getOutputKeyClass()); logger.info("output value class is " + job.getOutputValueClass()); job.setInputFormatClass(TextInputFormat.class); logger.info("input format class is " + job.getInputFormatClass()); job.setOutputFormatClass(TextOutputFormat.class); logger.info("output format class is " + job.getOutputFormatClass()); Path filePath = new Path(args[0]); logger.info("input path " + filePath); FileInputFormat.setInputPaths(job, filePath); Path outputPath = new Path(args[1]); logger.info("output path " + outputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); return 0; }