List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass
public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:com.cloudera.traffic.AveragerRunner.java
License:Apache License
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = new Job(conf); job.setJarByClass(AveragerRunner.class); job.setMapperClass(AveragerMapper.class); job.setReducerClass(AveragerReducer.class); job.setCombinerClass(AveragerReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(AverageWritable.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true);/*from w w w . j a v a 2s . c o m*/ }
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
/** * Create a map and reduce Hadoop job. Does not set the name on the job. * @param inputPath The input {@link org.apache.hadoop.fs.Path} * @param outputPath The output {@link org.apache.hadoop.fs.Path} * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat} * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class. If the Mapper is a no-op, * this value may be null * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class. If the Mapper is a no-op, * this value may be null * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use * @param reducerKey The reducer key class. * @param reducerValue The reducer value class. * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}. * @param conf The {@link org.apache.hadoop.conf.Configuration} to use. * @return The {@link org.apache.hadoop.mapreduce.Job}. * @throws IOException if there is a problem with the IO. * * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class) * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class, * org.apache.hadoop.conf.Configuration) *///w w w. j a va 2 s. c o m public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } // jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.conversantmedia.mapreduce.example.WordCount.java
License:Apache License
public static void main(String[] args) { try {/*from w w w. ja va2 s . c o m*/ Job job = Job.getInstance(new Configuration(), "WordCount v2"); job.setInputFormatClass(FileInputFormat.class); job.setOutputFormatClass(FileOutputFormat.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setCombinerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } catch (IOException | ClassNotFoundException | InterruptedException e) { e.printStackTrace(); } }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.ReducerInfoHandler.java
License:Apache License
@Override @SuppressWarnings({ "rawtypes" }) public void process(Annotation annotation, Job job, Object target) { ReducerInfo reduce = (ReducerInfo) annotation; if (reduce != null && reduce.value() != org.apache.hadoop.mapreduce.Reducer.class) { Class<? extends Reducer> reducerClass = reduce.value(); job.setReducerClass(reducerClass); // shouldn't use defaults, but there so that we can keep the option to // set as a property of the JobInfo annotation instead of standalone configureOutputKeyValue(job, reducerClass, reduce); }//from w ww .j ava 2s .co m }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test operates on a single file/*w w w . ja v a2s. c o m*/ * * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testSingle() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testSingle() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "zip-01.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Single")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test operates on a Path containing files that will cause the Job to fail * //from w w w . java 2 s. c o m * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testMultiple() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testMultiple() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Multiple")); // assertFalse(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test is identical to testMultiple() however the ZipFileInputFormat is set to * be lenient, errors that cause testMultiple() to fail will be quietly ignored here. * /*from ww w . j a v a 2 s . c om*/ * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testMultipleLenient() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testMultipleLenient() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setLenient(true); ZipFileInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_MultipleLenient")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * ZipInputStream doesn't support encrypted entries thus this will fail. * /* ww w .j a v a 2s. c o m*/ * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testEncryptedZip() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testEncryptedZip() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "encrypted.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Encrypted")); // assertFalse(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test explicitly tries to read a file containing random noise as a ZIP file, * the expected result is a quiet failure. The Job shouldn't fail if non-ZIP data is * encountered.//from www . ja v a 2 s.c o m * * Expected result: (quiet) failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testNonZipData() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testNonZipData() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "random.dat")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_NonZipData")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test refers to a corrupt (truncated) ZIP file, upon reaching the corruption * the Job will fail and no output will be written through the Reducer. * /*w ww. ja va 2 s .co m*/ * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testCorruptZip() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testCorruptZip() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "corrupt.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Corrupt")); // assertFalse(job.waitForCompletion(true)); }