List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(Configuration conf, Class exampleClass)
From source file:com.bigdata.diane.MiniTestDFSIO.java
License:Apache License
@SuppressWarnings("deprecation") private static void runIOTest(Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, Path outputDir, Configuration fsConfig) throws IOException { JobConf job = new JobConf(fsConfig, MiniTestDFSIO.class); FileInputFormat.setInputPaths(job, CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(mapperClass);//www .j a v a 2s . c o m job.setReducerClass(AccumulatingReducer.class); FileOutputFormat.setOutputPath(job, outputDir); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
From source file:com.chriscx.mapred.Driver.java
public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), Driver.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { if ("-skip".equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf); conf.setBoolean("wordcount.skip.patterns", true); } else {// w w w.j a v a 2 s.c om other_args.add(args[i]); } } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:com.chriscx.matching.Driver.java
public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), com.chriscx.mapred.Driver.class); conf.setJobName("Matching"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { if ("-skip".equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf); conf.setBoolean("wordcount.skip.patterns", true); } else {//from w ww.j a va 2 s.com other_args.add(args[i]); } } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:com.cloudera.avro.MapredColorCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MapredColorCount <input path> <output path>"); return -1; }//from www . j a va2s .c o m JobConf conf = new JobConf(getConf(), MapredColorCount.class); conf.setJobName("colorcount"); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); AvroJob.setMapperClass(conf, ColorCountMapper.class); AvroJob.setReducerClass(conf, ColorCountReducer.class); // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set // relevant config options such as input/output format, map output // classes, and output key class. AvroJob.setInputSchema(conf, User.getClassSchema()); AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT))); JobClient.runJob(conf); return 0; }
From source file:com.cloudera.recordservice.examples.mapreduce.MapredColorCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { org.apache.log4j.BasicConfigurator.configure(); if (args.length != 2) { System.err.println("Usage: MapredColorCount <input path> <output path>"); return -1; }/* ww w . j a v a 2s . c o m*/ JobConf conf = new JobConf(getConf(), MapredColorCount.class); conf.setJobName("colorcount With Generic Records"); // RECORDSERVICE: // By using the recordservice AvroJob utility, we can configure at run time to // switch between using the recordservice or not. // In this example, we'll set the conf to true to enable the RecordService.. conf.setBoolean(com.cloudera.recordservice.avro.AvroJob.USE_RECORD_SERVICE_INPUT_FORMAT_CONF_KEY, true); com.cloudera.recordservice.avro.AvroJob.setInputFormat(conf, org.apache.avro.mapred.AvroInputFormat.class); // RECORDSERVICE: // To read from a table instead of a path, comment out setInputPaths and instead use: RecordServiceConfig.setInputTable(conf, "rs", "users"); //FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); AvroJob.setMapperClass(conf, ColorCountMapper.class); AvroJob.setReducerClass(conf, ColorCountReducer.class); // Note that AvroJob.setOutputSchema set relevant config options such as output // format, map output classes, and output key class. // Do not need to setInputSchema when using Generic Records. AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT))); JobClient.runJob(conf); return 0; }
From source file:com.csiro.hadoop.UFORecord.java
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("UFO count"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: avro UFO counter <in> <out>"); System.exit(2);/* w w w .j av a2 s . c om*/ } org.apache.hadoop.mapred.FileInputFormat.addInputPath(conf, new Path(otherArgs[0])); Path outputPath = new Path(otherArgs[1]); org.apache.hadoop.mapred.FileOutputFormat.setOutputPath(conf, outputPath); outputPath.getFileSystem(conf).delete(outputPath); Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc")); AvroJob.setInputSchema(conf, input_schema); AvroJob.setMapOutputSchema(conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG))); AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA); AvroJob.setMapperClass(conf, AvroRecordMapper.class); AvroJob.setReducerClass(conf, AvroRecordReducer.class); conf.setInputFormat(AvroInputFormat.class); JobClient.runJob(conf); return 0; }
From source file:com.datatorrent.demos.mroperator.LogCountsPerHour.java
License:Open Source License
public int run(String[] args) throws Exception { // Create a configuration Configuration conf = getConf(); // Create a job from the default configuration that will use the WordCount class JobConf job = new JobConf(conf, LogCountsPerHour.class); // Define our input path as the first command line argument and our output path as the second Path in = new Path(args[0]); Path out = new Path(args[1]); // Create File Input/Output formats for these paths (in the job) FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // Configure the job: name, mapper, reducer, and combiner job.setJobName("LogAveragePerHour"); job.setMapperClass(LogMapClass.class); job.setReducerClass(LogReduce.class); job.setCombinerClass(LogReduce.class); // Configure the output job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(DateWritable.class); job.setOutputValueClass(IntWritable.class); // Run the job JobClient.runJob(job);/*from w w w . j av a2s. co m*/ return 0; }
From source file:com.ebay.erl.mobius.core.MobiusJob.java
License:Apache License
/** * Add a job, represented by the <code>aNewJob</code> object, into the execution queue. * <p>// w ww . ja va 2 s .c o m * * Users can use this method to add one or more jobs' configuration into the job queue, and Mobius engine * will analyze the <code>aNewJob</code> objects within the queue to understand the dependence of jobs. * For example, if job B's input is from job A, then job B won't be submitted until A is completed * successfully. If A failed, the B will not be submitted. * <p> * * * @param aNewJobConf a {@link Configuration} object represents a Hadoop job. * @throws IOException */ protected void addToExecQueue(Configuration aNewJobConf) throws IOException { // Add the new job into execution engine and realize // its dependency, if any. // // To realize the job dependency, we need to analyze the input // path of this new job. // // The inputs of a job could be: // 1) if aNewJob is not a derived job (ex: result of another MR job), // then the inputs of the job can be retrieved from "mapred.input.dir", // or from {@link MultipleInputs} (ex, joining different type of dataset)/ // 2) if aNewJob is a derived job, the input is from the output of previous // MR job. String inputFolders = aNewJobConf.get("mapred.input.dir", ""); if (inputFolders.length() == 0) { // the value of "mapred.input.dir" is empty, assuming the inputs of this job // are coming from {@link MultipleInputs}. String multipleInputs = aNewJobConf .get("mapred.input.dir.mappers"/* for using old MultipleInputs, v0.20.X */, aNewJobConf.get( "mapreduce.input.multipleinputs.dir.formats"/* for new MultipleInputs, v0.23.X */, "")); if (multipleInputs.length() > 0) { // the input paths of this job is coming from MultipleInputs, extract the input paths. // The format from {@link MultipleInputs} is like: hadoop_path1;corresponding_mapper1,hadoop_path2;corresponding_mapper2... String[] pathAndMapperPairs = multipleInputs.split(","); for (String aPair : pathAndMapperPairs) { String[] pathToMapper = aPair.split(";"); String path = pathToMapper[0]; String mapper = pathToMapper[1]; if (inputFolders.length() == 0) { inputFolders = getPathOnly(path); } else { inputFolders = inputFolders + "," + getPathOnly(path); } } } else { throw new IllegalArgumentException("Cannot find input path(s) of job: [" + aNewJobConf.get("mapred.job.name") + "] from the following attributes: " + "mapred.input.dir, mapred.input.dir.mappers, nor mapreduce.input.multipleinputs.dir.formats. " + "Please specify the input path(s) of this job."); } } else { // the input path of this job is specified in mapred.input.dir inputFolders = getPathOnly(inputFolders); } //////////////////////////////////////////////////////////// // validate output path of this job, to ensure it doesn't // use the same folder of another job's output. //////////////////////////////////////////////////////////// String outputPath = aNewJobConf.get("mapred.output.dir", ""); if (outputPath.isEmpty()) throw new IllegalStateException( "Please specify the output directory of job:" + aNewJobConf.get("mapred.job.name")); if (this.isOutputOfAnotherJob(outputPath)) { throw new IllegalArgumentException("Job [" + aNewJobConf.get("mapred.job.name") + "]'s output [" + outputPath + "] is " + "the output of job[" + jobTopology.get(outputPath).getJobName() + "], " + "please make sure to use different output folder for each job."); } ////////////////////////////////////////////////////////////////// // pass all the validation, start to build the dependencies. ////////////////////////////////////////////////////////////////// Job newJob = new ConfigurableJob(new JobConf(aNewJobConf, this.getClass())); newJob.setJobName(aNewJobConf.get("mapred.job.name", aNewJobConf.get("mapreduce.job.name", "Mobius Job"))); for (String anInputOfNewJob : inputFolders.split(",")) { // Added to track inputs for local PC sampling inputPaths.add(anInputOfNewJob); Job dependsOn = jobTopology.get(this.getFS().makeQualified(new Path(anInputOfNewJob)).toUri()); if (dependsOn != null) { List<Job> dependingJobs = newJob.getDependingJobs(); boolean alreadyInDependency = dependingJobs != null && dependingJobs.contains(dependsOn); if (alreadyInDependency) { // already added, do nothing. } else { LOGGER.info(newJob.getJobName() + " depends on " + dependsOn.getJobName()); newJob.addDependingJob(dependsOn); } } } // put the output of this <code>newJob</code> into job topology // so that later if a job read this <code>newJob</code>'s output // as its input, then the system can detect the dependency. URI outputPathURI = this.getFS().makeQualified(new Path(outputPath)).toUri(); LOGGER.info("Adding Job:" + newJob.getJobName() + "\tOutput:[" + outputPath.toString() + "]"); jobTopology.put(outputPathURI, newJob); }
From source file:com.facebook.LinkBench.LinkBenchDriverMR.java
License:Apache License
/** * create JobConf for map reduce job//from ww w .ja va 2 s . com * @param currentphase LOAD or REQUEST * @param nmappers number of mappers (loader or requester) */ private JobConf createJobConf(int currentphase, int nmappers) { final JobConf jobconf = new JobConf(getConf(), getClass()); jobconf.setJobName("LinkBench MapReduce Driver"); if (USE_INPUT_FILES) { jobconf.setInputFormat(SequenceFileInputFormat.class); } else { jobconf.setInputFormat(LinkBenchInputFormat.class); } jobconf.setOutputKeyClass(IntWritable.class); jobconf.setOutputValueClass(LongWritable.class); jobconf.setOutputFormat(SequenceFileOutputFormat.class); if (currentphase == LOAD) { jobconf.setMapperClass(LoadMapper.class); } else { //REQUEST jobconf.setMapperClass(RequestMapper.class); } jobconf.setNumMapTasks(nmappers); jobconf.setReducerClass(LoadRequestReducer.class); jobconf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobconf.setSpeculativeExecution(false); return jobconf; }
From source file:com.github.gaoyangthu.demo.mapred.DBCountPageView.java
License:Apache License
@Override //Usage DBCountPageView [driverClass dburl] public int run(String[] args) throws Exception { String driverClassName = DRIVER_CLASS; String url = DB_URL;//from www . ja v a2 s .c om if (args.length > 1) { driverClassName = args[0]; url = args[1]; } initialize(driverClassName, url); JobConf job = new JobConf(getConf(), DBCountPageView.class); job.setJobName("Count Pageviews of URLs"); job.setMapperClass(PageviewMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(PageviewReducer.class); DBConfiguration.configureDB(job, driverClassName, url); DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames); DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(PageviewRecord.class); job.setOutputValueClass(NullWritable.class); try { JobClient.runJob(job); boolean correct = verify(); if (!correct) { throw new RuntimeException("Evaluation was not correct!"); } } finally { shutdown(); } return 0; }