Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:com.neu.cs6240.AvgTimeToAnswer.AvgTimeToAnsPerHashTag.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapred.textoutputformat.separator", ",");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: AvgTimeToAnsPerHashTag <in> <out>");
        System.exit(2);/*  ww  w . j  ava  2  s .com*/
    }
    Job job = new Job(conf, "AvgTimeToAnsPerHashTag");
    job.setJarByClass(AvgTimeToAnsPerHashTag.class);
    job.setMapperClass(AvgTimeToAnsPerHashTagMapper.class);
    job.setReducerClass(AvgTimeToAnsPerHashTagReducer.class);
    job.setCombinerClass(AvgTimeToAnsPerHashTagReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setPartitionerClass(AvgTimeToAnsPerHashTagPartitioner.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.neusoft.hbase.test.hadoop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   w ww.j  a  va  2s .c  o  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.niuwa.hadoop.chubao.job.IndicatorJob002.java

License:Apache License

@Override
public void setJobSpecialInfo(Job job, Configuration conf, RunParams params, Map<String, Path> tempPaths)
        throws Exception {

    job.setMapperClass(IndicatorJob002.UserIdMapper.class);
    job.setCombinerClass(IndicatorJob002.CombinerSumReducer.class);
    job.setReducerClass(IndicatorJob002.IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // // w w w  .j a va  2  s .c  o  m
    FileInputFormat.addInputPath(job, ChubaoJobConfig.getInputPath(ChubaoJobConfig.INPUT_CONTACT));
    // 
    FileOutputFormat.setOutputPath(job, tempPaths.get(IndicatorJob002.class.getName()));
    // 
    HadoopUtil.deleteOutputFile(tempPaths.get(IndicatorJob002.class.getName()));

}

From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();//from ww w  .j av  a2s  .c o  m
    Configuration conf = new Configuration();
    String path = "hdfs://ns1:9000/user/root";
    if (args.length != 0) {
        path = args[0];
    }
    String[] args_1 = new String[] { path + "/chubao/input/contact",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" };
    String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs();
    // job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(JobControlTest.class);
    job.setMapperClass(UserIdMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    // 
    deleteOutputFile(otherArgs[1], otherArgs[0]);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    // job
    Job job2 = Job.getInstance(conf, "job2");
    job2.setJarByClass(JobControlTest.class);
    job2.setMapperClass(AddDateMapper.class);
    job2.setReducerClass(Job2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    // 
    deleteOutputFile(otherArgs[2], otherArgs[1]);
    FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));

    // ControlledJob
    ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());

    // ?
    controlledJob2.addDependingJob(controlledJob1);

    // JobControl
    JobControl jobControl = new JobControl("JobControlDemoGroup");
    jobControl.addJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    // ?
    Thread jobControlThread = new Thread(jobControl);
    jobControlThread.start();
    while (true) {
        if (jobControl.allFinished()) {
            System.out.println(jobControl.getSuccessfulJobList());
            jobControl.stop();
            break;
        }
    }
}

From source file:com.niuwa.hadoop.jobs.sample.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();//  w  w  w.j  av a 2s .  co m
    Configuration conf = new Configuration();
    args = new String[] { "hdfs://192.168.101.219:9000/user/root/input",
            "hdfs://192.168.101.219:9000/user/root/output/count" + new Date().getTime() };
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.obomprogramador.bigdata.sentiment.sentimentanalysis.Sentiment.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 3) {
        System.err.println("Usage: Sentiment <sentiwordnet file> <in> <out>");
        System.exit(2);//from   ww w.  j a va 2  s  .  c  om
    }
    System.out.println("Param1: " + otherArgs[0] + " Param2: " + otherArgs[1] + " Param3: " + otherArgs[2]);
    conf.set("sentwordnetfile", otherArgs[0]);
    Job job = Job.getInstance(conf);
    job.setJarByClass(Sentiment.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.panguso.lc.analysis.format.Logcenter.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    context = new ClassPathXmlApplicationContext("applicationContext.xml");
    Properties prop = context.getBean("configProperties", Properties.class);
    // ??// w  w  w  . ja  v a2 s . co  m
    // String time = new DateTime().toString("yyyyMMddHH");

    // hadoop.lib=/application/format/lib/
    // hadoop.conf=/application/format/conf/
    // hadoop.src=/log/src/
    // hadoop.dest=/log/dest/
    // hadoop.archive=/log/archive/
    libPath = prop.getProperty("hadoop.lib");
    confPath = prop.getProperty("hadoop.conf");
    srcPath = prop.getProperty("hadoop.src");
    destPath = prop.getProperty("hadoop.dest");
    archivePath = prop.getProperty("hadoop.archive");
    Configuration conf = getConf();
    logger.info("libPath=" + libPath);
    logger.info("confPath=" + confPath);
    logger.info("srcPath=" + srcPath);
    logger.info("destPath=" + destPath);
    logger.info("archivePath=" + archivePath);

    FileSystem fs = FileSystem.get(conf);
    // --jar
    FileStatus[] fJars = fs.listStatus(new Path(libPath));
    for (FileStatus fileStatus : fJars) {
        String jar = libPath + fileStatus.getPath().getName();
        DistributedCache.addFileToClassPath(new Path(jar), conf, FileSystem.get(conf));
    }
    // --?
    FileStatus[] fProp = fs.listStatus(new Path(confPath));
    for (FileStatus fileStatus : fProp) {
        DistributedCache.addArchiveToClassPath(new Path(confPath + fileStatus.getPath().getName()), conf,
                FileSystem.get(conf));
    }
    FileStatus[] fDirs = fs.listStatus(new Path(srcPath));
    if (fDirs != null && fDirs.length > 0) {
        for (FileStatus file : fDirs) {
            // dir
            String currentTime = file.getPath().getName();
            String srcPathWithTime = srcPath + currentTime + "/";
            String destPathWithTime = destPath + currentTime + "/";
            String archPathWithTime = archivePath + currentTime + "/";
            // ??
            if (analysisService.isSuccessful(currentTime)) {
                continue;
            }

            // ??job?

            // 
            fs.delete(new Path(destPathWithTime), true);

            // ?
            // if (!fs.exists(new Path(srcPathWithTime))) {
            // logger.warn("outPath does not exist,inputPath=" +
            // srcPathWithTime);
            // analysisService.saveFailureJob(job.getJobName(),
            // currentTime);
            // return -1;
            // }
            // ?classpath";"":"
            Job job = new Job(conf);
            String jars = job.getConfiguration().get("mapred.job.classpath.files");
            job.getConfiguration().set("mapred.job.classpath.files", jars.replace(";", ":"));
            logger.info("current dir=" + currentTime);
            job.setJobName("format_" + currentTime);

            job.setJarByClass(Logcenter.class);
            job.setMapperClass(FormatAnalysisMapper.class);
            job.setReducerClass(FormatAnalysisReducer.class);
            job.setCombinerClass(FormatAnalysisReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            // job.setNumReduceTasks(0);
            // //??reduce????namenode
            FileInputFormat.addInputPath(job, new Path(srcPathWithTime));
            FileOutputFormat.setOutputPath(job, new Path(destPathWithTime));

            // ?
            boolean result = false;
            try {
                result = job.waitForCompletion(true);
            } catch (FileAlreadyExistsException e) {
                logger.warn(e.getMessage(), e);
            }
            if (!result) {
                logger.warn("job execute failure!");
                analysisService.saveFailureJob(job.getJobName(), currentTime);
                continue;
                // return -1;
            }

            // ,
            fs.delete(new Path(archPathWithTime), true);
            fs.rename(new Path(srcPathWithTime), new Path(archPathWithTime));
            analysisService.saveSuccessJob(job.getJobName(), currentTime);
        }
    }

    FileSystem.closeAll();
    return 0;
}

From source file:com.paperbook.test.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    //    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //    if (otherArgs.length != 2) {
    //      System.err.println("Usage: wordcount <in> <out>");
    //      System.exit(2);
    //    }/*from   w ww. j  a va2  s  . c  om*/
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    // The path is local path on the local file system not the hdfs
    FileInputFormat.addInputPath(job, new Path("resources/words.txt"));
    FileOutputFormat.setOutputPath(job, new Path("output"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.phantom.hadoop.examples.DBCountPageView.java

License:Apache License

@Override
// Usage DBCountPageView [driverClass dburl]
public int run(String[] args) throws Exception {

    String driverClassName = DRIVER_CLASS;
    String url = DB_URL;/*from w  w  w .  j  a  v a  2s  . c om*/

    if (args.length > 1) {
        driverClassName = args[0];
        url = args[1];
    }

    initialize(driverClassName, url);
    Configuration conf = getConf();

    DBConfiguration.configureDB(conf, driverClassName, url);

    Job job = new Job(conf);

    job.setJobName("Count Pageviews of URLs");
    job.setJarByClass(DBCountPageView.class);
    job.setMapperClass(PageviewMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(PageviewReducer.class);

    DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames);

    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(PageviewRecord.class);
    job.setOutputValueClass(NullWritable.class);
    int ret;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return ret;
}

From source file:com.phantom.hadoop.examples.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//  ww  w  .ja v  a 2  s. c o  m

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = new Job(conf);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = new Job(conf);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
                LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}