List of usage examples for org.apache.hadoop.mapreduce.lib.input MultipleInputs addInputPath
@SuppressWarnings("unchecked") public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass)
From source file:adts.ContainingArticle.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "ContainingArticle"); job.setJarByClass(ContainingArticle.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); Path queriesInputPath = new Path(args[0]); Path articlesInputPath = new Path(args[1]); MultipleInputs.addInputPath(job, queriesInputPath, TextInputFormat.class, QueriesMap.class); MultipleInputs.addInputPath(job, articlesInputPath, TextInputFormat.class, ArticlesMap.class); FileOutputFormat.setOutputPath(job, new Path("/root/temporary")); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.waitForCompletion(true);//from www .jav a 2s . c o m Job collectingJob = new Job(conf, "ContainingArticle"); collectingJob.setJarByClass(ContainingArticle.class); collectingJob.setOutputKeyClass(IntWritable.class); collectingJob.setOutputValueClass(Text.class); collectingJob.setMapperClass(CollectMap.class); collectingJob.setReducerClass(CollectReduce.class); collectingJob.setInputFormatClass(TextInputFormat.class); collectingJob.setOutputFormatClass(TextOutputFormat.class); collectingJob.setMapOutputKeyClass(Text.class); collectingJob.setMapOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(collectingJob, new Path("/root/temporary")); FileOutputFormat.setOutputPath(collectingJob, new Path("/root/temporary2")); collectingJob.waitForCompletion(true); Job countingJob = new Job(conf, "ContainingArticle"); countingJob.setJarByClass(ContainingArticle.class); countingJob.setOutputKeyClass(Text.class); countingJob.setOutputValueClass(IntWritable.class); countingJob.setMapperClass(CountMap.class); countingJob.setReducerClass(CountReduce.class); countingJob.setInputFormatClass(TextInputFormat.class); countingJob.setOutputFormatClass(TextOutputFormat.class); countingJob.setMapOutputKeyClass(IntWritable.class); countingJob.setMapOutputValueClass(Text.class); FileInputFormat.addInputPath(countingJob, new Path("/root/temporary2")); FileOutputFormat.setOutputPath(countingJob, new Path(args[2])); countingJob.waitForCompletion(true); }
From source file:adts.PopularKeywords.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "PopularKeywords"); job.setJarByClass(PopularKeywords.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); Path queriesInputPath = new Path(args[0]); Path StopWordsInputPath = new Path(args[1]); MultipleInputs.addInputPath(job, queriesInputPath, TextInputFormat.class, Map.class); MultipleInputs.addInputPath(job, StopWordsInputPath, TextInputFormat.class, StopwordsMap.class); FileOutputFormat.setOutputPath(job, new Path("/root/temporary")); job.waitForCompletion(true);/*from w w w . j av a2 s.c om*/ Job sortingJob = new Job(conf, "PopularKeywords"); sortingJob.setJarByClass(PopularKeywords.class); sortingJob.setOutputKeyClass(Text.class); sortingJob.setOutputValueClass(LongWritable.class); sortingJob.setMapperClass(ReverseMap.class); sortingJob.setReducerClass(ReverseReduce.class); sortingJob.setInputFormatClass(TextInputFormat.class); sortingJob.setOutputFormatClass(TextOutputFormat.class); sortingJob.setSortComparatorClass(LongWritable.DecreasingComparator.class); sortingJob.setMapOutputKeyClass(LongWritable.class); sortingJob.setMapOutputValueClass(Text.class); FileInputFormat.addInputPath(sortingJob, new Path("/root/temporary")); FileOutputFormat.setOutputPath(sortingJob, new Path(args[2])); sortingJob.setNumReduceTasks(1); sortingJob.waitForCompletion(true); }
From source file:AllLab_Skeleton.Lab6.ReduceSideJoin.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "ReduceSideJoin"); job.setJarByClass(ReduceSideJoin.class); // Use MultipleInputs to set which input uses what mapper // This will keep parsing of each data set separate from a logical // standpoint // The first two elements of the args array are the two inputs MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, UserJoinMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentJoinMapper.class); job.getConfiguration().set("join.type", "leftouter"); //job.setNumReduceTasks(0); job.setReducerClass(UserJoinReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.waitForCompletion(true);//from w w w . j a v a2 s. c o m }
From source file:Assignment5_P6_StructureToHierarchyPattern.Structure_HierarchyDriver.java
/** * @param args the command line arguments *//*from www.j a v a 2 s . c o m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Structure to Hierarchy"); job.setJarByClass(Structure_HierarchyDriver.class); // pass file 1 to this mapper in Text format MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, Structure_Hierarchy_Movie_Mapper.class); // pass file 2 to this mapper in Text format MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, Structure_Hierarchy_Tag_Mapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setReducerClass(Structure_Hierarchy_Reducer.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); System.exit(job.waitForCompletion(true) ? 0 : 2); }
From source file:cityhub.CityHub.java
@Override public int run(String[] strings) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "ReduceJoin"); job.setJarByClass(CityHub.class); MultipleInputs.addInputPath(job, new Path(strings[0]), TextInputFormat.class, JoinMapper1.class); MultipleInputs.addInputPath(job, new Path(strings[1]), TextInputFormat.class, JoinMapper2.class); job.getConfiguration().set("join.type", "innerjoin"); job.setReducerClass(JoinReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(strings[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean complete = job.waitForCompletion(true); Configuration conf1 = new Configuration(); Job job2 = Job.getInstance(conf1, "chaining"); if (complete) { job2.setJarByClass(CityHub.class); MultipleInputs.addInputPath(job2, new Path(strings[2]), TextInputFormat.class, JoinMapper3.class); MultipleInputs.addInputPath(job2, new Path(strings[3]), TextInputFormat.class, JoinMapper4.class); job2.getConfiguration().set("join.type", "innerjoin"); job2.setReducerClass(JoinReducer1.class); job2.setOutputFormatClass(TextOutputFormat.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); TextOutputFormat.setOutputPath(job2, new Path(strings[4])); }//from ww w. java 2s . c om boolean success = job2.waitForCompletion(true); return success ? 0 : 4; }
From source file:com.cmcc.hy.bigdata.weijifen.jobs.hubei.score.ScoreInfoDayJob.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = ConfigurationUtil.loginAuthentication(args, SEPCIFIC_CONFIG_NAME, getConf()); // ?()/* www . ja v a2 s . co m*/ String statDate = DateUtil.getFilterDate(args); if (statDate == null) { System.exit(1); } conf.set(STAT_DAY, statDate); // ?job Job job = Job.getInstance(conf, JOB_NAME + ":" + statDate); job.setJarByClass(ScoreInfoDayJob.class); String scoreInfoInput = conf.get(SCORE_INFO_INPUT_PATH); Path scoreInfoPath = new Path(scoreInfoInput); String acctPhoneMapInfoInput = conf.get(ACCT_PHONE_MAP_INPUT_PATH); Path accPhoneMapInfoPath = new Path(acctPhoneMapInfoInput); // ? if (FileSystemUtil.exists(scoreInfoPath)) { MultipleInputs.addInputPath(job, scoreInfoPath, SequenceFileInputFormat.class, ScoreInfoDayMapper.class); logger.info("SocreInfoPath is " + scoreInfoInput); } else { logger.error("Path [{}] not exist!", scoreInfoInput); } // ?? // if (FileSystemUtil.exists(accPhoneMapInfoPath)) { // MultipleInputs.addInputPath(job, accPhoneMapInfoPath, TextInputFormat.class, // AcctPhoneMapper.class); // logger.info("AccPhoneMapInfoPath is " + acctPhoneMapInfoInput); // } else { // logger.error("Path [{}] not exist!", acctPhoneMapInfoInput); // } // job job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(ScoreInfo.class); job.setNumReduceTasks(conf.getInt(REDUCE_NUMBER, 40)); job.setOutputFormatClass(NullOutputFormat.class); // TableMapReduceUtil.initTableReducerJob(HBaseTableSchema.USER_INFO_TABLE2, // ScoreInfoDayReducer.class, job); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.datasalt.utils.mapred.counter.MapRedCounter.java
License:Apache License
/** * Adds an input file and {@link MapRedCounterMapper} to be processed for emit groups and items that then will be * counted. Remember you have to implement your own {@link MapRedCounterMapper} to be provided here. *//* w ww .jav a2 s . c o m*/ @SuppressWarnings({ "rawtypes" }) public static void addInput(Job job, Path location, Class<? extends InputFormat> inputFormat, Class<? extends MapRedCounterMapper> mapper) throws IOException { MultipleInputs.addInputPath(job, location, inputFormat, mapper); job.setJarByClass(mapper); }
From source file:com.datasalt.utils.mapred.joiner.MultiJoiner.java
License:Apache License
/** * Adds an input specification. This input won't be associated to a channel as it will be a * {@link MultiJoinMultiChannelMapper}./*from ww w. ja va 2s .co m*/ * * @param location * @param inputFormat * @param mapper * * @throws IOException */ public MultiJoiner addInput(Path location, Class<? extends InputFormat> inputFormat, Class<? extends MultiJoinMultiChannelMapper> mapper) throws IOException { MultipleInputs.addInputPath(getJob(), location, inputFormat, mapper); return this; }
From source file:com.datasalt.utils.mapred.joiner.MultiJoiner.java
License:Apache License
private void addChanneledInputInner(Integer channel, Path location, Class<? extends Object> channelClass, Class<? extends InputFormat> inputFormat, Class<? extends MultiJoinChanneledMapper> mapper) throws IOException { FileSystem fS = location.getFileSystem(getJob().getConfiguration()); if (!location.toString().startsWith("/")) { // relative path location = new Path(fS.getWorkingDirectory(), location); } else {/*from w w w . j a v a 2 s . c o m*/ // absolute path location = new Path(fS.getUri() + location.toString()); } addInOrder(channel + "", MultiJoinChanneledMapper.MULTIJOINER_CHANNELED_CHANNELS, getJob().getConfiguration()); addInOrder(location.toString(), MultiJoinChanneledMapper.MULTIJOINER_CHANNELED_FILES, getJob().getConfiguration()); System.out.println("Adding file " + location + " with mapper " + mapper.getName()); MultipleInputs.addInputPath(getJob(), location, inputFormat, mapper); }
From source file:com.jbw.recommendsystem.filter.FilterMRD.java
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Path rPath = new Path(conf.get("rin")); Path aPath = new Path(conf.get("ain")); Path out = new Path(conf.get("out")); Job job = Job.getInstance(conf);/*from w ww . ja va 2 s. co m*/ job.setJobName("666"); job.setJarByClass(FilterMRD.class); MultipleInputs.addInputPath(job, rPath, TextInputFormat.class, RelationMapper.class); MultipleInputs.addInputPath(job, aPath, TextInputFormat.class, AddMapper.class); job.setMapOutputKeyClass(Text.class); job.setReducerClass(FilterReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, out); job.setOutputKeyClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }