List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobConf conf) throws IOException
From source file:com.cloudera.traffic.AveragerRunner.java
License:Apache License
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = new Job(conf); job.setJarByClass(AveragerRunner.class); job.setMapperClass(AveragerMapper.class); job.setReducerClass(AveragerReducer.class); job.setCombinerClass(AveragerReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(AverageWritable.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true);/*from www. j av a 2 s .co m*/ }
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(new Configuration(conf)); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }//from ww w .j a v a 2s. c om job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
/** * Create a map and reduce Hadoop job. Does not set the name on the job. * @param inputPath The input {@link org.apache.hadoop.fs.Path} * @param outputPath The output {@link org.apache.hadoop.fs.Path} * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat} * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class. If the Mapper is a no-op, * this value may be null * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class. If the Mapper is a no-op, * this value may be null * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use * @param reducerKey The reducer key class. * @param reducerValue The reducer value class. * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}. * @param conf The {@link org.apache.hadoop.conf.Configuration} to use. * @return The {@link org.apache.hadoop.mapreduce.Job}. * @throws IOException if there is a problem with the IO. * * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class) * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class, * org.apache.hadoop.conf.Configuration) *//* w ww .jav a 2s. c o m*/ public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } // jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test operates on a single file/*from ww w. j a v a 2s . c om*/ * * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testSingle() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testSingle() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "zip-01.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Single")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test operates on a Path containing files that will cause the Job to fail * //from w w w. j a v a 2 s . c om * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testMultiple() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testMultiple() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Multiple")); // assertFalse(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test is identical to testMultiple() however the ZipFileInputFormat is set to * be lenient, errors that cause testMultiple() to fail will be quietly ignored here. * //from www .j a v a 2 s . com * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testMultipleLenient() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testMultipleLenient() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setLenient(true); ZipFileInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_MultipleLenient")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * ZipInputStream doesn't support encrypted entries thus this will fail. * //from w w w .java2 s . c o m * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testEncryptedZip() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testEncryptedZip() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "encrypted.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Encrypted")); // assertFalse(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test explicitly tries to read a file containing random noise as a ZIP file, * the expected result is a quiet failure. The Job shouldn't fail if non-ZIP data is * encountered./*from ww w . ja v a 2s .co m*/ * * Expected result: (quiet) failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testNonZipData() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testNonZipData() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "random.dat")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_NonZipData")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test refers to a corrupt (truncated) ZIP file, upon reaching the corruption * the Job will fail and no output will be written through the Reducer. * /* www. j a v a 2 s .c o m*/ * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testCorruptZip() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testCorruptZip() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "corrupt.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Corrupt")); // assertFalse(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test refers to a corrupt (truncated) ZIP file, upon reaching the corruption * the Mapper will ignore the corrupt entry and close the ZIP file. All previous * output will be treated as normal and passed through the Reducer. * // w ww.j av a 2 s . c o m * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testCorruptZipLenient() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testCorruptZipLenient() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setLenient(true); ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "corrupt.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_CorruptLenient")); // assertTrue(job.waitForCompletion(true)); }