List of usage examples for org.apache.hadoop.mapred JobConf setJobName
public void setJobName(String name)
From source file:com.ebay.erl.mobius.core.MobiusJob.java
License:Apache License
/** * Select the <code>columns</code> from the <code>dataset</code>, store * it into <code>outputFolder</code> with the given <code>outputFormat</code> * <p>/* w w w. j av a 2s .c o m*/ * * Here is an example: * <pre> * <code> * public MyJob extends MobiusJob * { * public void run(String[] args) * { * Dataset students = ...; * * // save the result to $OUTPUT in SequenceFileOutputFormat, * // the key will be NullWritable, and the value is a Tuple * // which contains 3 columns, id, f_name and l_name. * this.list(students, * new Path("$OUTPUT"), * SequenceFileOutputFormat.class, * new Column(students, "id"), * new Column(students, "f_name"), * new Column(students, "l_name") * ); * } * * public static void main(String[] args) throw Exception * { * System.exit(MobiusJobRunner.run(new MyJob(), args)); * } * } * </code> * </pre> */ public Dataset list(Dataset dataset, Path outputFolder, Class<? extends FileOutputFormat> outputFormat, Column... columns) throws IOException { byte datasetID = 0;// set to 0 as there is only one dataset to be operated. JobConf job = dataset.createJobConf(datasetID); job.set("mapred.job.name", "Listing " + dataset.getName()); job.setJarByClass(this.getClass()); job.setNumReduceTasks(0); // list is map only job job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Tuple.class); job.setJobName("List " + dataset.getName()); JobSetup.validateColumns(dataset, columns); JobSetup.setupInputs(job, dataset, datasetID); JobSetup.setupProjections(job, dataset, datasetID, columns); JobSetup.setupOutputs(job, outputFolder, outputFormat); this.addToExecQueue(job); AbstractDatasetBuilder builder = DatasetBuildersFactory.getInstance(this).getBuilder(outputFormat, "Dataset_" + outputFolder.getName()); return builder.buildFromPreviousJob(job, outputFormat, Column.toSchemaArray(columns)); }
From source file:com.example.hadoop.hdfs.test.HdfsClient.java
License:Open Source License
public static JobConf config() { JobConf conf = new JobConf(HdfsClient.class); conf.setJobName("HdfsClient"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); return conf;/*from w ww . j av a 2s. c o m*/ }
From source file:com.example.hadoop.mapreduce.test.MapReduceTest.java
License:Open Source License
public static void main(String[] args) throws IOException { String input = HDFS_PATH + "/input/README.txt"; String input2 = HDFS_PATH + "/input/README2.txt"; String output = HDFS_PATH + "/test/output"; // ?mapreduce??? if (HdfsClient.exists(output)) { HdfsClient.rm(output);/*from w w w . j ava 2s . c om*/ } JobConf conf = new JobConf(MapReduceTest.class); conf.setJobName("MapReduceTest"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); // mapper conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); // reducer conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); // mapper conf.setMapperClass(MapperTest.class); // combiner?????mapper??reducer? conf.setCombinerClass(ReducerTest.class); // reducer conf.setReducerClass(ReducerTest.class); // MapReduce? conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // MapReduce? FileInputFormat.setInputPaths(conf, new Path[] { new Path(input), new Path(input2) }); // MapReduce? FileOutputFormat.setOutputPath(conf, new Path(output)); try { JobClient.runJob(conf); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.facebook.LinkBench.LinkBenchDriverMR.java
License:Apache License
/** * create JobConf for map reduce job/* ww w . java 2 s .co m*/ * @param currentphase LOAD or REQUEST * @param nmappers number of mappers (loader or requester) */ private JobConf createJobConf(int currentphase, int nmappers) { final JobConf jobconf = new JobConf(getConf(), getClass()); jobconf.setJobName("LinkBench MapReduce Driver"); if (USE_INPUT_FILES) { jobconf.setInputFormat(SequenceFileInputFormat.class); } else { jobconf.setInputFormat(LinkBenchInputFormat.class); } jobconf.setOutputKeyClass(IntWritable.class); jobconf.setOutputValueClass(LongWritable.class); jobconf.setOutputFormat(SequenceFileOutputFormat.class); if (currentphase == LOAD) { jobconf.setMapperClass(LoadMapper.class); } else { //REQUEST jobconf.setMapperClass(RequestMapper.class); } jobconf.setNumMapTasks(nmappers); jobconf.setReducerClass(LoadRequestReducer.class); jobconf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobconf.setSpeculativeExecution(false); return jobconf; }
From source file:com.github.gaoyangthu.demo.mapred.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { JobConf conf; int depth = 5; int width = 9; int height = 10; Class<? extends Pentomino> pentClass; if (args.length == 0) { System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }//w w w. j a v a 2 s .com conf = new JobConf(getConf()); // Pick up the parameters, should the user set these width = conf.getInt("pent.width", width); height = conf.getInt("pent.height", height); depth = conf.getInt("pent.depth", depth); pentClass = conf.getClass("pent.class", OneSidedPentomino.class, Pentomino.class); for (int i = 0; i < args.length; i++) { if (args[i].equalsIgnoreCase("-depth")) { depth = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-height")) { height = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-width")) { width = Integer.parseInt(args[++i].trim()); } } // Set parameters for MR tasks to pick up either which way the user sets // them or not conf.setInt("pent.width", width); conf.setInt("pent.height", height); conf.setInt("pent.depth", depth); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); conf.setJarByClass(PentMap.class); conf.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); createInputDirectory(fileSys, input, pent, depth); // the keys are the prefix strings conf.setOutputKeyClass(Text.class); // the values are puzzle solutions conf.setOutputValueClass(Text.class); conf.setMapperClass(PentMap.class); conf.setReducerClass(IdentityReducer.class); conf.setNumMapTasks(2000); conf.setNumReduceTasks(1); JobClient.runJob(conf); } finally { fileSys.delete(input, true); } return 0; }
From source file:com.github.gaoyangthu.demo.mapred.DBCountPageView.java
License:Apache License
@Override //Usage DBCountPageView [driverClass dburl] public int run(String[] args) throws Exception { String driverClassName = DRIVER_CLASS; String url = DB_URL;/*from ww w . j a v a2 s .c o m*/ if (args.length > 1) { driverClassName = args[0]; url = args[1]; } initialize(driverClassName, url); JobConf job = new JobConf(getConf(), DBCountPageView.class); job.setJobName("Count Pageviews of URLs"); job.setMapperClass(PageviewMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(PageviewReducer.class); DBConfiguration.configureDB(job, driverClassName, url); DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames); DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(PageviewRecord.class); job.setOutputValueClass(NullWritable.class); try { JobClient.runJob(job); boolean correct = verify(); if (!correct) { throw new RuntimeException("Evaluation was not correct!"); } } finally { shutdown(); } return 0; }
From source file:com.github.gaoyangthu.demo.mapred.Grep.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }// ww w . ja va 2s .co m Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf grepJob = new JobConf(getConf(), Grep.class); try { grepJob.setJobName("grep-search"); FileInputFormat.setInputPaths(grepJob, args[0]); grepJob.setMapperClass(RegexMapper.class); grepJob.set("mapred.mapper.regex", args[2]); if (args.length == 4) grepJob.set("mapred.mapper.regex.group", args[3]); grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(grepJob, tempDir); grepJob.setOutputFormat(SequenceFileOutputFormat.class); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); JobClient.runJob(grepJob); JobConf sortJob = new JobConf(Grep.class); sortJob.setJobName("grep-sort"); FileInputFormat.setInputPaths(sortJob, tempDir); sortJob.setInputFormat(SequenceFileInputFormat.class); sortJob.setMapperClass(InverseMapper.class); sortJob.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob, new Path(args[1])); sortJob.setOutputKeyComparatorClass // sort by decreasing freq (LongWritable.DecreasingComparator.class); JobClient.runJob(sortJob); } finally { FileSystem.get(grepJob).delete(tempDir, true); } return 0; }
From source file:com.github.gaoyangthu.demo.mapred.PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from ww w. j a va2s .co m */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:com.github.gaoyangthu.demo.mapred.terasort.TeraGen.java
License:Apache License
/** * @param args the cli arguments/* w ww .j a v a 2s. c om*/ */ public int run(String[] args) throws IOException { JobConf job = (JobConf) getConf(); setNumberOfRows(job, Long.parseLong(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RangeInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); JobClient.runJob(job); return 0; }
From source file:com.github.gaoyangthu.demo.mapred.terasort.TeraSort.java
License:Apache License
public int run(String[] args) throws Exception { LOG.info("starting"); JobConf job = (JobConf) getConf(); Path inputDir = new Path(args[0]); inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); TeraInputFormat.writePartitionFile(job, partitionFile); DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); job.setInt("dfs.replication", 1); TeraOutputFormat.setFinalSync(job, true); JobClient.runJob(job);/*from w w w . ja v a 2 s .c om*/ LOG.info("done"); return 0; }