List of usage examples for org.apache.hadoop.mapreduce Job setOutputValueClass
public void setOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.cloudera.sqoop.mapreduce.MySQLDumpImportJob.java
License:Apache License
/** * Set the mapper class implementation to use in the job, * as well as any related configuration (e.g., map output types). *//*w w w .j av a2s . c o m*/ protected void configureMapper(Job job, String tableName, String tableClassName) throws ClassNotFoundException, IOException { job.setMapperClass(getMapperClass()); job.setOutputKeyClass(String.class); job.setOutputValueClass(NullWritable.class); }
From source file:com.cloudera.test.UseHCat.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); args = new GenericOptionsParser(conf, args).getRemainingArgs(); // Get the input and output table names as arguments String inputTableName = args[0]; String outputTableName = args[1]; // Assume the default database String dbName = null;//from w ww . j a v a2s. co m Job job = new Job(conf, "UseHCat"); HCatInputFormat.setInput(job, dbName, inputTableName); job.setJarByClass(UseHCat.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); // An HCatalog record as input job.setInputFormatClass(HCatInputFormat.class); // Mapper emits a string as key and an integer as value job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // Ignore the key for the reducer output; emitting an HCatalog record as value job.setOutputKeyClass(WritableComparable.class); job.setOutputValueClass(DefaultHCatRecord.class); job.setOutputFormatClass(HCatOutputFormat.class); HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null)); HCatSchema s = HCatOutputFormat.getTableSchema(job); System.err.println("INFO: output schema explicitly set for writing:" + s); HCatOutputFormat.setSchema(job, s); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(new Configuration(conf)); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/*www . j a va 2s .com*/ job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
/** * Create a map and reduce Hadoop job. Does not set the name on the job. * @param inputPath The input {@link org.apache.hadoop.fs.Path} * @param outputPath The output {@link org.apache.hadoop.fs.Path} * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat} * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class. If the Mapper is a no-op, * this value may be null * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class. If the Mapper is a no-op, * this value may be null * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use * @param reducerKey The reducer key class. * @param reducerValue The reducer value class. * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}. * @param conf The {@link org.apache.hadoop.conf.Configuration} to use. * @return The {@link org.apache.hadoop.mapreduce.Job}. * @throws IOException if there is a problem with the IO. * * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class) * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class, * org.apache.hadoop.conf.Configuration) *///from ww w. j ava2 s. c om public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } // jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.conversantmedia.mapreduce.example.WordCount.java
License:Apache License
public static void main(String[] args) { try {//w ww . j ava 2s. c om Job job = Job.getInstance(new Configuration(), "WordCount v2"); job.setInputFormatClass(FileInputFormat.class); job.setOutputFormatClass(FileOutputFormat.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setCombinerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } catch (IOException | ClassNotFoundException | InterruptedException e) { e.printStackTrace(); } }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MapperInfoHandler.java
License:Apache License
@SuppressWarnings("rawtypes") public void configureOutputKeyValue(Job job, Class<? extends Mapper> mapperClass, MapperInfo map, boolean isMapOnly) { MaraAnnotationUtil util = MaraAnnotationUtil.INSTANCE; // Try and work it out from the generics Type[] params = util.getGenericTypeParams(mapperClass, Mapper.class); int length = 4; int keyIdx = 2; int valueIdx = 3; // Special case for TableMapper - assume if there are only two, they are the output key/value // TODO resolve this hack - force explicit? if (params != null && params.length == 2) { length = 2;/*from ww w . ja v a2 s . c o m*/ keyIdx = 0; valueIdx = 1; } if (map != null && map.output().key() != void.class) { job.setMapOutputKeyClass(map.output().key()); if (isMapOnly) { job.setOutputKeyClass(map.output().key()); } } else if (params != null && params.length == length) { job.setMapOutputKeyClass((Class<?>) params[keyIdx]); if (isMapOnly) { job.setOutputKeyClass((Class<?>) params[keyIdx]); } } if (map != null && map.output().value() != void.class) { job.setMapOutputValueClass(map.output().value()); if (isMapOnly) { job.setOutputValueClass(map.output().value()); } } else if (params != null && params.length == length) { job.setMapOutputValueClass((Class<?>) params[valueIdx]); if (isMapOnly) { job.setOutputValueClass((Class<?>) params[valueIdx]); } } }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.ReducerInfoHandler.java
License:Apache License
@SuppressWarnings("rawtypes") protected void configureOutputKeyValue(Job job, Class<? extends Reducer> reducerClass, ReducerInfo reducer) { MaraAnnotationUtil util = MaraAnnotationUtil.INSTANCE; // Try and work it out from the generics Type[] params = util.getGenericTypeParams(reducerClass, Reducer.class); if (reducer != null && reducer.output().key() != void.class) { job.setOutputKeyClass(reducer.output().key()); } else if (params != null && params.length == 4) { job.setOutputKeyClass((Class<?>) params[2]); }/*from www . ja v a2 s. c o m*/ if (reducer != null && reducer.output().value() != void.class) { job.setOutputValueClass(reducer.output().value()); } else if (params != null && params.length == 4) { job.setOutputValueClass((Class<?>) params[3]); } }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test operates on a single file/*from w ww .ja va 2 s .c om*/ * * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testSingle() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testSingle() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "zip-01.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Single")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test operates on a Path containing files that will cause the Job to fail * // ww w . j av a 2 s . c om * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testMultiple() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testMultiple() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Multiple")); // assertFalse(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test is identical to testMultiple() however the ZipFileInputFormat is set to * be lenient, errors that cause testMultiple() to fail will be quietly ignored here. * //from www . ja va 2 s .c o m * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testMultipleLenient() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testMultipleLenient() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setLenient(true); ZipFileInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_MultipleLenient")); // assertTrue(job.waitForCompletion(true)); }