List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass
public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException
From source file:com.fanlehai.hadoop.join.CompositeJoin.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job.//from ww w .ja v a 2s .c o m * * @throws IOException * When there is communication problems with the job tracker. */ @SuppressWarnings("rawtypes") public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String join_reduces = conf.get(REDUCES_PER_HOST); if (join_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces); } Job job = Job.getInstance(conf); job.setJobName("join"); job.setJarByClass(CompositeJoin.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); Class<? extends InputFormat> inputFormatClass = KeyValueTextInputFormat.class;// SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = Text.class;// BytesWritable.class; Class<? extends Writable> outputValueClass = Text.class;//TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } String strOut = otherArgs.remove(otherArgs.size() - 1); FileSystem.get(new Configuration()).delete(new Path(strOut), true); FileOutputFormat.setOutputPath(job, new Path(strOut)); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } job.setInputFormatClass(CompositeInputFormat.class); job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); job.setOutputFormatClass(outputFormatClass); job.setMapperClass(MapComposite.class); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.fanlehai.hadoop.serialize.avro.MapReduceAvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();//www . j a v a 2 s . c o m } FileSystem.get(new Configuration()).delete(new Path(args[1]), true); Job job = Job.getInstance(super.getConf(), "AvroWordCount"); job.setJarByClass(MapReduceAvroWordCount.class); job.setJobName("AvroWordCount"); // We call setOutputSchema first so we can override the configuration // parameters it sets AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT))); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setSortComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 1 : 0; }
From source file:com.fanlehai.hadoop.serialize.avro.MapReduceColorCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();//from ww w . j a v a2 s.co m } FileSystem.get(new Configuration()).delete(new Path(args[1]), true); Job job = Job.getInstance(super.getConf(), "MapReduceAvroWordCount"); job.setJarByClass(MapReduceColorCount.class); job.setJobName("Color Count"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(ColorCountMapper.class); AvroJob.setInputKeySchema(job, User.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return job.waitForCompletion(true) ? 1 : 0; }
From source file:com.fanlehai.hadoop.serialize.json.multiline.ExampleJob.java
License:Apache License
/** * The MapReduce driver - setup and launch the job. * * @param args//from w ww. j a v a 2s. c o m * the command-line arguments * @return the process exit code * @throws Exception * if something goes wrong */ @Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: ExampleJob <in dir> <out dir>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2); } String input = args[0]; String output = args[1]; Configuration conf = super.getConf(); writeInput(conf, new Path(input)); Job job = Job.getInstance(getConf(), "ExampleJob"); job.setJarByClass(ExampleJob.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); Path outputPath = new Path(output); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, outputPath); // use the JSON input format job.setInputFormatClass(MultiLineJsonInputFormat.class); // specify the JSON attribute name which is used to determine which // JSON elements are supplied to the mapper MultiLineJsonInputFormat.setInputJsonMember(job, "colorName"); if (job.waitForCompletion(true)) { return 0; } return 1; }
From source file:com.flipkart.fdp.migration.distcp.core.MirrorDistCPDriver.java
License:Apache License
private Job createJob(Configuration configuration) throws Exception { System.out.println("Initializing BlueShift v 2.0..."); System.out.println("Configuration: " + dcmConfig.toString()); Job job = Job.getInstance(configuration, "BlueShift v 2.0 - " + dcmConfig.getBatchName()); job.setJarByClass(MirrorDistCPDriver.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MirrorMapper.class); job.setReducerClass(MirrorReducer.class); job.setInputFormatClass(MirrorFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, stateManager.getReportPath()); job.setNumReduceTasks(configuration.getInt("mapreduce.reduce.tasks", 1)); System.out.println("Job Initialization Complete, The status of the Mirror job will be written to: " + stateManager.getReportPath()); return job;// ww w . ja va2s . co m }
From source file:com.flytxt.yesbank.processor.HdfsToHbaseEngine.java
License:Open Source License
public static void main(String[] args) throws Exception { if (args.length < 1) { System.out.println("Hdfs to Hbase Engine requires the model Id as Input ..."); System.exit(1);// w ww.j a v a 2 s.co m } String modelId = args[0]; DBConnection dbConnection = DBConnection.getInstance(); dbConnection.loadDbProperties(); dbConnection.initializeDataBaseConnection(); String hfdsInputLoc = dbConnection.getHdfsInputDirectory(modelId); if (hfdsInputLoc != null) { Configuration conf = new Configuration(); String params = args[0]; conf.set("test", params); Job job = new Job(conf); // Configuration conf = new Configuration(); // Job job = Job.getInstance(conf, "hfds to hbase Engine"); job.setJarByClass(HdfsToHbaseEngine.class); job.setMapperClass(HdfsEngineMapper.class); // job.setCombinerClass(test.class); // job.setReducerClass(test.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(hfdsInputLoc)); job.setOutputFormatClass(NullOutputFormat.class); // FileOutputFormat.setOutputPath(job, new Path("/output_dir")); // conf.set("argParamValue", args[2]); // System.out.println("second argument value ------" + args[0]); // System.out.println(" in main method conf.getStrings --------" + // conf.get("argParamValue")); System.exit(job.waitForCompletion(true) ? 0 : 1); } else { System.out.println(" Hdfs Input Location deos not exists .. Unable to process the Request ...."); System.exit(0); } }
From source file:com.flytxt.yesbank.test.ModelProcessor.java
License:Open Source License
public static void main(String[] args) throws Exception { String modelId = args[0];//w w w . j a v a 2 s .co m ModelProcessor modelProcessor = new ModelProcessor(); modelProcessor.loadDbProperties(); modelProcessor.initializeDataBaseConnection(); String hfdsInputLoc = modelProcessor.getHdfsInputDirectory(modelId); if (hfdsInputLoc != null) { Configuration conf = new Configuration(); String params = args[0]; conf.set("test", params); Job job = new Job(conf); // Configuration conf = new Configuration(); // Job job = Job.getInstance(conf, "word count"); job.setJarByClass(ModelProcessor.class); job.setMapperClass(HdfsProcessMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(hfdsInputLoc)); job.setOutputFormatClass(NullOutputFormat.class); // FileOutputFormat.setOutputPath(job, new Path("/output_dir")); // conf.set("argParamValue", args[2]); // System.out.println("second argument value ------" + args[0]); // System.out.println(" in main method conf.getStrings --------" + // conf.get("argParamValue")); System.exit(job.waitForCompletion(true) ? 0 : 1); } else { System.out.println(" Hdfs Input Location deos not exists .. Unable to process the Request ...."); System.exit(0); } }
From source file:com.frdeso.app.Sleepy.java
License:Apache License
/** * Performs integer summation of all the values for each key. *///from w w w.ja v a 2 s . c o m @Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: wordmean <in> <out> <number second>"); return 0; } Configuration conf = getConf(); conf.set("mapSleepTime", args[2]); @SuppressWarnings("deprecation") Job job = new Job(conf, "joba"); job.setJarByClass(Sleepy.class); job.setMapperClass(SleepyMapper.class); job.setCombinerClass(Reducer.class); job.setReducerClass(Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); return (result ? 0 : 1); }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
/** * Create a map-only Hadoop Job out of the passed in parameters. Does not set the * Job name.// w w w . j ava 2 s . c o m * * @see #getCustomJobName(String, JobContext, Class, Class) */ public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { // Job job = new Job(new Configuration(conf)); Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
/** * * @param inputPaths//from w w w .j a v a2 s . c o m * @param outputPath * @param inputFormat * @param inputKey * @param inputValue * @param mapper * @param mapperKey * @param mapperValue * @param combiner * @param reducer * @param outputKey * @param outputValue * @param outputFormat * @param conf * @param overwrite * @param isCompress * @return * @throws IOException */ public static Job prepareAvroJob(String inputPaths, String outputPath, Class<? extends InputFormat> inputFormat, Object inputKey, Object inputValue, Class<? extends Mapper> mapper, Object mapperKey, Object mapperValue, Class<? extends Reducer> combiner, Class<? extends Reducer> reducer, Object outputKey, Object outputValue, Class<? extends OutputFormat> outputFormat, Configuration conf, boolean overwrite, boolean isCompress) throws IOException { Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (inputKey instanceof Schema) { if (inputValue instanceof Schema) { inputFormat = inputFormat == null ? AvroKeyValueInputFormat.class : inputFormat; } inputFormat = inputFormat == null ? AvroKeyInputFormat.class : inputFormat; } if (inputFormat != null) { job.setInputFormatClass(inputFormat); } if (inputKey instanceof Schema) { AvroJob.setInputKeySchema(job, (Schema) inputKey); } if (inputValue instanceof Schema) { AvroJob.setInputValueSchema(job, (Schema) inputValue); } if (outputKey instanceof Schema) { if (outputValue instanceof Schema) { outputFormat = outputFormat == null ? AvroKeyValueOutputFormat.class : outputFormat; } outputFormat = outputFormat == null ? AvroKeyOutputFormat.class : outputFormat; } if (outputFormat != null) { job.setOutputFormatClass(outputFormat); } if (outputKey instanceof Schema) { AvroJob.setOutputKeySchema(job, (Schema) outputKey); } else if (outputKey instanceof Class) { job.setOutputKeyClass((Class) outputKey); } if (outputValue instanceof Schema) { AvroJob.setOutputValueSchema(job, (Schema) outputValue); } else if (outputValue instanceof Class) { job.setOutputValueClass((Class) outputValue); } if (reducer == null) { job.setNumReduceTasks(0); if (mapperKey instanceof Schema) { AvroJob.setMapOutputKeySchema(job, (Schema) mapperKey); } else if (mapperKey instanceof Class) { job.setOutputKeyClass((Class) mapperKey); } if (mapperValue instanceof Schema) { AvroJob.setOutputValueSchema(job, (Schema) mapperValue); } else if (mapperKey instanceof Class) { job.setOutputValueClass((Class) mapperValue); } job.setJarByClass(mapper); } else if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, new Path(outputPath)); if (isCompress) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); } job.setMapperClass(mapper); if (mapperKey instanceof Schema) { AvroJob.setMapOutputKeySchema(job, (Schema) mapperKey); } else if (mapperKey instanceof Class) { job.setMapOutputKeyClass((Class) mapperKey); } if (mapperValue instanceof Schema) { AvroJob.setMapOutputValueSchema(job, (Schema) mapperValue); } else if (mapperKey instanceof Class) { job.setMapOutputValueClass((Class) mapperValue); } if (reducer != null) { job.setReducerClass(reducer); } if (combiner != null) { job.setCombinerClass(combiner); } if (overwrite) { HadoopUtils.delete(jobConf, new Path(outputPath)); } return job; }