List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass
public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException
From source file:com.conversantmedia.mapreduce.example.WordCount.java
License:Apache License
public static void main(String[] args) { try {//from w w w . j av a2 s .c o m Job job = Job.getInstance(new Configuration(), "WordCount v2"); job.setInputFormatClass(FileInputFormat.class); job.setOutputFormatClass(FileOutputFormat.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setCombinerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } catch (IOException | ClassNotFoundException | InterruptedException e) { e.printStackTrace(); } }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MapperInfoHandler.java
License:Apache License
@Override @SuppressWarnings("rawtypes") public void process(Annotation annotation, Job job, Object target) { MapperInfo map = (MapperInfo) annotation; if (map.value() != org.apache.hadoop.mapreduce.Mapper.class) { Class<? extends Mapper> mapperClass = map.value(); job.setMapperClass(mapperClass); // Is this a map-only job? Field jobField = (Field) target; boolean isMapOnly = isMapOnlyJob(job, jobField); configureOutputKeyValue(job, mapperClass, map, isMapOnly); }// w w w . j ava 2 s. c o m }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MultiInputAnnotationHandler.java
License:Apache License
@Override public void process(Annotation annotation, Job job, Object target) throws ToolException { for (Input input : ((MultiInput) annotation).value()) { Path path = getInputAsPath(input.path()); if (input.mapper() == Mapper.class) { MultipleInputs.addInputPath(job, path, input.format()); } else {//from w w w .j a va2s . co m MultipleInputs.addInputPath(job, path, input.format(), input.mapper()); // Need to call again here so the call is captured by our aspect which // will replace it with the annotated delegating mapper class for resource // injection if required. job.setMapperClass(DelegatingMapper.class); } } }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test operates on a single file//from w w w . ja v a 2s.c om * * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testSingle() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testSingle() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "zip-01.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Single")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test operates on a Path containing files that will cause the Job to fail * /*from w ww . java2 s. co m*/ * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testMultiple() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testMultiple() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Multiple")); // assertFalse(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test is identical to testMultiple() however the ZipFileInputFormat is set to * be lenient, errors that cause testMultiple() to fail will be quietly ignored here. * // w w w . j a v a 2 s . c o m * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testMultipleLenient() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testMultipleLenient() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setLenient(true); ZipFileInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_MultipleLenient")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * ZipInputStream doesn't support encrypted entries thus this will fail. * //from w w w .j av a 2 s .co m * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testEncryptedZip() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testEncryptedZip() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "encrypted.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Encrypted")); // assertFalse(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test explicitly tries to read a file containing random noise as a ZIP file, * the expected result is a quiet failure. The Job shouldn't fail if non-ZIP data is * encountered.// w w w .j a va 2 s .c o m * * Expected result: (quiet) failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testNonZipData() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testNonZipData() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "random.dat")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_NonZipData")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test refers to a corrupt (truncated) ZIP file, upon reaching the corruption * the Job will fail and no output will be written through the Reducer. * /*from www. j av a2 s. co m*/ * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testCorruptZip() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testCorruptZip() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "corrupt.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Corrupt")); // assertFalse(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test refers to a corrupt (truncated) ZIP file, upon reaching the corruption * the Mapper will ignore the corrupt entry and close the ZIP file. All previous * output will be treated as normal and passed through the Reducer. * // w w w . java2s . c o m * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testCorruptZipLenient() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testCorruptZipLenient() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setLenient(true); ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "corrupt.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_CorruptLenient")); // assertTrue(job.waitForCompletion(true)); }