List of usage examples for org.apache.hadoop.mapred JobConf setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass)
From source file:com.ebay.nest.FormattedSequenceFile.java
License:Apache License
@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setOutputFormat(SequenceFileOutputFormat.class); }
From source file:com.example.hadoop.mapreduce.test.MapReduceTest.java
License:Open Source License
public static void main(String[] args) throws IOException { String input = HDFS_PATH + "/input/README.txt"; String input2 = HDFS_PATH + "/input/README2.txt"; String output = HDFS_PATH + "/test/output"; // ?mapreduce??? if (HdfsClient.exists(output)) { HdfsClient.rm(output);/*from w w w .ja v a 2 s .c o m*/ } JobConf conf = new JobConf(MapReduceTest.class); conf.setJobName("MapReduceTest"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); // mapper conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); // reducer conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); // mapper conf.setMapperClass(MapperTest.class); // combiner?????mapper??reducer? conf.setCombinerClass(ReducerTest.class); // reducer conf.setReducerClass(ReducerTest.class); // MapReduce? conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // MapReduce? FileInputFormat.setInputPaths(conf, new Path[] { new Path(input), new Path(input2) }); // MapReduce? FileOutputFormat.setOutputPath(conf, new Path(output)); try { JobClient.runJob(conf); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.facebook.LinkBench.LinkBenchDriverMR.java
License:Apache License
/** * create JobConf for map reduce job//from www.j a v a 2 s . co m * @param currentphase LOAD or REQUEST * @param nmappers number of mappers (loader or requester) */ private JobConf createJobConf(int currentphase, int nmappers) { final JobConf jobconf = new JobConf(getConf(), getClass()); jobconf.setJobName("LinkBench MapReduce Driver"); if (USE_INPUT_FILES) { jobconf.setInputFormat(SequenceFileInputFormat.class); } else { jobconf.setInputFormat(LinkBenchInputFormat.class); } jobconf.setOutputKeyClass(IntWritable.class); jobconf.setOutputValueClass(LongWritable.class); jobconf.setOutputFormat(SequenceFileOutputFormat.class); if (currentphase == LOAD) { jobconf.setMapperClass(LoadMapper.class); } else { //REQUEST jobconf.setMapperClass(RequestMapper.class); } jobconf.setNumMapTasks(nmappers); jobconf.setReducerClass(LoadRequestReducer.class); jobconf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobconf.setSpeculativeExecution(false); return jobconf; }
From source file:com.flaptor.hounder.crawler.Nutch9Fetcher.java
License:Apache License
/** * Create a nutch fetchlist segment from the provided list of pages. * @param fetchlist the list of pages from which to build the segment. *///from w w w .j a v a 2 s.c o m private String buildSegment(FetchList fetchlist) throws IOException { // create the segment dir String segmentDir = getNewSegmentDir(); Path output = new Path(segmentDir, CrawlDatum.GENERATE_DIR_NAME); JobConf job = new JobConf(); job.setOutputPath(output); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); // job.setOutputFormat(SequenceFileOutputFormat.class); // job.setOutputKeyComparatorClass(HashComparator.class); RecordWriter writer = new SequenceFileOutputFormat().getRecordWriter(null, job, "fetcher", new NoProgress()); for (com.flaptor.hounder.crawler.pagedb.Page page : fetchlist) { Text key = new Text(page.getUrl()); CrawlDatum value = new CrawlDatum(); // TODO: try taking this line outside of the loop writer.write(key, value); } writer.close(null); return segmentDir; }
From source file:com.foobar.store.TextFileOutputConfigurator.java
License:Apache License
@Override public void setSequential(JobConf conf) throws Exception { super.setSequential(conf); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); }
From source file:com.foobar.store.TextFileOutputConfigurator.java
License:Apache License
@Override public void setParallel(JobConf conf) throws Exception { super.setParallel(conf); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); }
From source file:com.github.gaoyangthu.demo.mapred.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { JobConf conf; int depth = 5; int width = 9; int height = 10; Class<? extends Pentomino> pentClass; if (args.length == 0) { System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }//from w w w . j av a 2 s . c om conf = new JobConf(getConf()); // Pick up the parameters, should the user set these width = conf.getInt("pent.width", width); height = conf.getInt("pent.height", height); depth = conf.getInt("pent.depth", depth); pentClass = conf.getClass("pent.class", OneSidedPentomino.class, Pentomino.class); for (int i = 0; i < args.length; i++) { if (args[i].equalsIgnoreCase("-depth")) { depth = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-height")) { height = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-width")) { width = Integer.parseInt(args[++i].trim()); } } // Set parameters for MR tasks to pick up either which way the user sets // them or not conf.setInt("pent.width", width); conf.setInt("pent.height", height); conf.setInt("pent.depth", depth); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); conf.setJarByClass(PentMap.class); conf.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); createInputDirectory(fileSys, input, pent, depth); // the keys are the prefix strings conf.setOutputKeyClass(Text.class); // the values are puzzle solutions conf.setOutputValueClass(Text.class); conf.setMapperClass(PentMap.class); conf.setReducerClass(IdentityReducer.class); conf.setNumMapTasks(2000); conf.setNumReduceTasks(1); JobClient.runJob(conf); } finally { fileSys.delete(input, true); } return 0; }
From source file:com.github.gaoyangthu.demo.mapred.DBCountPageView.java
License:Apache License
@Override //Usage DBCountPageView [driverClass dburl] public int run(String[] args) throws Exception { String driverClassName = DRIVER_CLASS; String url = DB_URL;/*from w w w . ja v a2 s . c om*/ if (args.length > 1) { driverClassName = args[0]; url = args[1]; } initialize(driverClassName, url); JobConf job = new JobConf(getConf(), DBCountPageView.class); job.setJobName("Count Pageviews of URLs"); job.setMapperClass(PageviewMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(PageviewReducer.class); DBConfiguration.configureDB(job, driverClassName, url); DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames); DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(PageviewRecord.class); job.setOutputValueClass(NullWritable.class); try { JobClient.runJob(job); boolean correct = verify(); if (!correct) { throw new RuntimeException("Evaluation was not correct!"); } } finally { shutdown(); } return 0; }
From source file:com.github.gaoyangthu.demo.mapred.Grep.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }//from w w w . j a va 2s. c om Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf grepJob = new JobConf(getConf(), Grep.class); try { grepJob.setJobName("grep-search"); FileInputFormat.setInputPaths(grepJob, args[0]); grepJob.setMapperClass(RegexMapper.class); grepJob.set("mapred.mapper.regex", args[2]); if (args.length == 4) grepJob.set("mapred.mapper.regex.group", args[3]); grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(grepJob, tempDir); grepJob.setOutputFormat(SequenceFileOutputFormat.class); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); JobClient.runJob(grepJob); JobConf sortJob = new JobConf(Grep.class); sortJob.setJobName("grep-sort"); FileInputFormat.setInputPaths(sortJob, tempDir); sortJob.setInputFormat(SequenceFileInputFormat.class); sortJob.setMapperClass(InverseMapper.class); sortJob.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob, new Path(args[1])); sortJob.setOutputKeyComparatorClass // sort by decreasing freq (LongWritable.DecreasingComparator.class); JobClient.runJob(sortJob); } finally { FileSystem.get(grepJob).delete(tempDir, true); } return 0; }
From source file:com.github.gaoyangthu.demo.mapred.PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi// w w w . ja v a 2 s.co m */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }