Example usage for org.apache.hadoop.mapreduce.lib.input MultipleInputs addInputPath

List of usage examples for org.apache.hadoop.mapreduce.lib.input MultipleInputs addInputPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input MultipleInputs addInputPath.

Prototype

@SuppressWarnings("unchecked")
public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass,
        Class<? extends Mapper> mapperClass) 

Source Link

Document

Add a Path with a custom InputFormat and Mapper to the list of inputs for the map-reduce job.

Usage

From source file:adts.ContainingArticle.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "ContainingArticle");
    job.setJarByClass(ContainingArticle.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path queriesInputPath = new Path(args[0]);
    Path articlesInputPath = new Path(args[1]);
    MultipleInputs.addInputPath(job, queriesInputPath, TextInputFormat.class, QueriesMap.class);
    MultipleInputs.addInputPath(job, articlesInputPath, TextInputFormat.class, ArticlesMap.class);

    FileOutputFormat.setOutputPath(job, new Path("/root/temporary"));
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.waitForCompletion(true);//from  www .jav a  2s  . c o  m

    Job collectingJob = new Job(conf, "ContainingArticle");
    collectingJob.setJarByClass(ContainingArticle.class);

    collectingJob.setOutputKeyClass(IntWritable.class);
    collectingJob.setOutputValueClass(Text.class);

    collectingJob.setMapperClass(CollectMap.class);
    collectingJob.setReducerClass(CollectReduce.class);

    collectingJob.setInputFormatClass(TextInputFormat.class);
    collectingJob.setOutputFormatClass(TextOutputFormat.class);
    collectingJob.setMapOutputKeyClass(Text.class);
    collectingJob.setMapOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(collectingJob, new Path("/root/temporary"));
    FileOutputFormat.setOutputPath(collectingJob, new Path("/root/temporary2"));

    collectingJob.waitForCompletion(true);

    Job countingJob = new Job(conf, "ContainingArticle");
    countingJob.setJarByClass(ContainingArticle.class);

    countingJob.setOutputKeyClass(Text.class);
    countingJob.setOutputValueClass(IntWritable.class);

    countingJob.setMapperClass(CountMap.class);
    countingJob.setReducerClass(CountReduce.class);

    countingJob.setInputFormatClass(TextInputFormat.class);
    countingJob.setOutputFormatClass(TextOutputFormat.class);
    countingJob.setMapOutputKeyClass(IntWritable.class);
    countingJob.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(countingJob, new Path("/root/temporary2"));
    FileOutputFormat.setOutputPath(countingJob, new Path(args[2]));

    countingJob.waitForCompletion(true);
}

From source file:adts.PopularKeywords.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "PopularKeywords");
    job.setJarByClass(PopularKeywords.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path queriesInputPath = new Path(args[0]);
    Path StopWordsInputPath = new Path(args[1]);
    MultipleInputs.addInputPath(job, queriesInputPath, TextInputFormat.class, Map.class);
    MultipleInputs.addInputPath(job, StopWordsInputPath, TextInputFormat.class, StopwordsMap.class);

    FileOutputFormat.setOutputPath(job, new Path("/root/temporary"));

    job.waitForCompletion(true);/*from  w w w . j  av a2 s.c om*/

    Job sortingJob = new Job(conf, "PopularKeywords");
    sortingJob.setJarByClass(PopularKeywords.class);

    sortingJob.setOutputKeyClass(Text.class);
    sortingJob.setOutputValueClass(LongWritable.class);

    sortingJob.setMapperClass(ReverseMap.class);
    sortingJob.setReducerClass(ReverseReduce.class);

    sortingJob.setInputFormatClass(TextInputFormat.class);
    sortingJob.setOutputFormatClass(TextOutputFormat.class);
    sortingJob.setSortComparatorClass(LongWritable.DecreasingComparator.class);
    sortingJob.setMapOutputKeyClass(LongWritable.class);
    sortingJob.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(sortingJob, new Path("/root/temporary"));
    FileOutputFormat.setOutputPath(sortingJob, new Path(args[2]));

    sortingJob.setNumReduceTasks(1);
    sortingJob.waitForCompletion(true);
}

From source file:AllLab_Skeleton.Lab6.ReduceSideJoin.java

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();

    Job job = Job.getInstance(conf, "ReduceSideJoin");
    job.setJarByClass(ReduceSideJoin.class);

    // Use MultipleInputs to set which input uses what mapper
    // This will keep parsing of each data set separate from a logical
    // standpoint
    // The first two elements of the args array are the two inputs
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, UserJoinMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentJoinMapper.class);
    job.getConfiguration().set("join.type", "leftouter");
    //job.setNumReduceTasks(0);
    job.setReducerClass(UserJoinReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(args[2]));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.waitForCompletion(true);//from w w w .  j a v a2 s. c o  m
}

From source file:Assignment5_P6_StructureToHierarchyPattern.Structure_HierarchyDriver.java

/**
 * @param args the command line arguments
 *//*from www.j a  v a 2 s . c o m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Structure to Hierarchy");
    job.setJarByClass(Structure_HierarchyDriver.class);

    // pass file 1 to this mapper in Text format
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class,
            Structure_Hierarchy_Movie_Mapper.class);

    // pass file 2 to this mapper in Text format
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class,
            Structure_Hierarchy_Tag_Mapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(Structure_Hierarchy_Reducer.class);

    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    System.exit(job.waitForCompletion(true) ? 0 : 2);
}

From source file:cityhub.CityHub.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "ReduceJoin");
    job.setJarByClass(CityHub.class);

    MultipleInputs.addInputPath(job, new Path(strings[0]), TextInputFormat.class, JoinMapper1.class);
    MultipleInputs.addInputPath(job, new Path(strings[1]), TextInputFormat.class, JoinMapper2.class);
    job.getConfiguration().set("join.type", "innerjoin");

    job.setReducerClass(JoinReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(strings[2]));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean complete = job.waitForCompletion(true);
    Configuration conf1 = new Configuration();
    Job job2 = Job.getInstance(conf1, "chaining");
    if (complete) {
        job2.setJarByClass(CityHub.class);

        MultipleInputs.addInputPath(job2, new Path(strings[2]), TextInputFormat.class, JoinMapper3.class);
        MultipleInputs.addInputPath(job2, new Path(strings[3]), TextInputFormat.class, JoinMapper4.class);
        job2.getConfiguration().set("join.type", "innerjoin");

        job2.setReducerClass(JoinReducer1.class);
        job2.setOutputFormatClass(TextOutputFormat.class);

        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);
        TextOutputFormat.setOutputPath(job2, new Path(strings[4]));
    }//from  ww  w.  java  2s .  c  om
    boolean success = job2.waitForCompletion(true);
    return success ? 0 : 4;

}

From source file:com.cmcc.hy.bigdata.weijifen.jobs.hubei.score.ScoreInfoDayJob.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    Configuration conf = ConfigurationUtil.loginAuthentication(args, SEPCIFIC_CONFIG_NAME, getConf());

    // ?()/* www . ja v a2  s  . co m*/
    String statDate = DateUtil.getFilterDate(args);
    if (statDate == null) {
        System.exit(1);
    }

    conf.set(STAT_DAY, statDate);

    // ?job
    Job job = Job.getInstance(conf, JOB_NAME + ":" + statDate);
    job.setJarByClass(ScoreInfoDayJob.class);
    String scoreInfoInput = conf.get(SCORE_INFO_INPUT_PATH);
    Path scoreInfoPath = new Path(scoreInfoInput);

    String acctPhoneMapInfoInput = conf.get(ACCT_PHONE_MAP_INPUT_PATH);
    Path accPhoneMapInfoPath = new Path(acctPhoneMapInfoInput);

    // ?
    if (FileSystemUtil.exists(scoreInfoPath)) {
        MultipleInputs.addInputPath(job, scoreInfoPath, SequenceFileInputFormat.class,
                ScoreInfoDayMapper.class);
        logger.info("SocreInfoPath is " + scoreInfoInput);
    } else {
        logger.error("Path [{}] not exist!", scoreInfoInput);
    }

    // ??
    //        if (FileSystemUtil.exists(accPhoneMapInfoPath)) {
    //            MultipleInputs.addInputPath(job, accPhoneMapInfoPath, TextInputFormat.class,
    //                    AcctPhoneMapper.class);
    //            logger.info("AccPhoneMapInfoPath is " + acctPhoneMapInfoInput);
    //        } else {
    //            logger.error("Path [{}] not exist!", acctPhoneMapInfoInput);
    //        }
    // job
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(ScoreInfo.class);
    job.setNumReduceTasks(conf.getInt(REDUCE_NUMBER, 40));
    job.setOutputFormatClass(NullOutputFormat.class);

    //        TableMapReduceUtil.initTableReducerJob(HBaseTableSchema.USER_INFO_TABLE2,
    //                ScoreInfoDayReducer.class, job);

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.datasalt.utils.mapred.counter.MapRedCounter.java

License:Apache License

/**
 * Adds an input file and {@link MapRedCounterMapper} to be processed for emit groups and items that then will be
 * counted. Remember you have to implement your own {@link MapRedCounterMapper} to be provided here.
 *//* w ww  .jav a2 s . c o m*/
@SuppressWarnings({ "rawtypes" })
public static void addInput(Job job, Path location, Class<? extends InputFormat> inputFormat,
        Class<? extends MapRedCounterMapper> mapper) throws IOException {

    MultipleInputs.addInputPath(job, location, inputFormat, mapper);
    job.setJarByClass(mapper);
}

From source file:com.datasalt.utils.mapred.joiner.MultiJoiner.java

License:Apache License

/**
 * Adds an input specification. This input won't be associated to a channel as it will be a
 * {@link MultiJoinMultiChannelMapper}./*from ww  w.  ja va  2s  .co  m*/
 * 
 * @param location
 * @param inputFormat
 * @param mapper
 * 
 * @throws IOException
 */
public MultiJoiner addInput(Path location, Class<? extends InputFormat> inputFormat,
        Class<? extends MultiJoinMultiChannelMapper> mapper) throws IOException {
    MultipleInputs.addInputPath(getJob(), location, inputFormat, mapper);
    return this;
}

From source file:com.datasalt.utils.mapred.joiner.MultiJoiner.java

License:Apache License

private void addChanneledInputInner(Integer channel, Path location, Class<? extends Object> channelClass,
        Class<? extends InputFormat> inputFormat, Class<? extends MultiJoinChanneledMapper> mapper)
        throws IOException {

    FileSystem fS = location.getFileSystem(getJob().getConfiguration());
    if (!location.toString().startsWith("/")) {
        // relative path
        location = new Path(fS.getWorkingDirectory(), location);
    } else {/*from w w w .  j a v  a  2  s .  c  o  m*/
        // absolute path
        location = new Path(fS.getUri() + location.toString());
    }
    addInOrder(channel + "", MultiJoinChanneledMapper.MULTIJOINER_CHANNELED_CHANNELS,
            getJob().getConfiguration());
    addInOrder(location.toString(), MultiJoinChanneledMapper.MULTIJOINER_CHANNELED_FILES,
            getJob().getConfiguration());
    System.out.println("Adding file " + location + " with mapper " + mapper.getName());
    MultipleInputs.addInputPath(getJob(), location, inputFormat, mapper);
}

From source file:com.jbw.recommendsystem.filter.FilterMRD.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path rPath = new Path(conf.get("rin"));
    Path aPath = new Path(conf.get("ain"));
    Path out = new Path(conf.get("out"));

    Job job = Job.getInstance(conf);/*from w ww .  ja va  2 s. co m*/
    job.setJobName("666");
    job.setJarByClass(FilterMRD.class);

    MultipleInputs.addInputPath(job, rPath, TextInputFormat.class, RelationMapper.class);
    MultipleInputs.addInputPath(job, aPath, TextInputFormat.class, AddMapper.class);
    job.setMapOutputKeyClass(Text.class);

    job.setReducerClass(FilterReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, out);

    job.setOutputKeyClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}