Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the combiner class for the job.

Usage

From source file:com.neu.cs6240.AvgTimeToAnswer.AvgTimeToAnsPerHashTag.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapred.textoutputformat.separator", ",");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: AvgTimeToAnsPerHashTag <in> <out>");
        System.exit(2);/*  ww  w . j  ava  2  s .com*/
    }
    Job job = new Job(conf, "AvgTimeToAnsPerHashTag");
    job.setJarByClass(AvgTimeToAnsPerHashTag.class);
    job.setMapperClass(AvgTimeToAnsPerHashTagMapper.class);
    job.setReducerClass(AvgTimeToAnsPerHashTagReducer.class);
    job.setCombinerClass(AvgTimeToAnsPerHashTagReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setPartitionerClass(AvgTimeToAnsPerHashTagPartitioner.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.neusoft.hbase.test.hadoop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   w ww.j  a  va  2s .c  o  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.niuwa.hadoop.chubao.job.IndicatorJob002.java

License:Apache License

@Override
public void setJobSpecialInfo(Job job, Configuration conf, RunParams params, Map<String, Path> tempPaths)
        throws Exception {

    job.setMapperClass(IndicatorJob002.UserIdMapper.class);
    job.setCombinerClass(IndicatorJob002.CombinerSumReducer.class);
    job.setReducerClass(IndicatorJob002.IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // // w w w  .j a va  2  s .c  o  m
    FileInputFormat.addInputPath(job, ChubaoJobConfig.getInputPath(ChubaoJobConfig.INPUT_CONTACT));
    // 
    FileOutputFormat.setOutputPath(job, tempPaths.get(IndicatorJob002.class.getName()));
    // 
    HadoopUtil.deleteOutputFile(tempPaths.get(IndicatorJob002.class.getName()));

}

From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();//from ww w  .j av  a2s  .c o  m
    Configuration conf = new Configuration();
    String path = "hdfs://ns1:9000/user/root";
    if (args.length != 0) {
        path = args[0];
    }
    String[] args_1 = new String[] { path + "/chubao/input/contact",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" };
    String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs();
    // job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(JobControlTest.class);
    job.setMapperClass(UserIdMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    // 
    deleteOutputFile(otherArgs[1], otherArgs[0]);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    // job
    Job job2 = Job.getInstance(conf, "job2");
    job2.setJarByClass(JobControlTest.class);
    job2.setMapperClass(AddDateMapper.class);
    job2.setReducerClass(Job2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    // 
    deleteOutputFile(otherArgs[2], otherArgs[1]);
    FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));

    // ControlledJob
    ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());

    // ?
    controlledJob2.addDependingJob(controlledJob1);

    // JobControl
    JobControl jobControl = new JobControl("JobControlDemoGroup");
    jobControl.addJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    // ?
    Thread jobControlThread = new Thread(jobControl);
    jobControlThread.start();
    while (true) {
        if (jobControl.allFinished()) {
            System.out.println(jobControl.getSuccessfulJobList());
            jobControl.stop();
            break;
        }
    }
}

From source file:com.niuwa.hadoop.jobs.sample.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();//  w  w  w.j  av a 2s .  co m
    Configuration conf = new Configuration();
    args = new String[] { "hdfs://192.168.101.219:9000/user/root/input",
            "hdfs://192.168.101.219:9000/user/root/output/count" + new Date().getTime() };
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.obomprogramador.bigdata.sentiment.sentimentanalysis.Sentiment.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 3) {
        System.err.println("Usage: Sentiment <sentiwordnet file> <in> <out>");
        System.exit(2);//from   ww w.  j a va 2  s  .  c  om
    }
    System.out.println("Param1: " + otherArgs[0] + " Param2: " + otherArgs[1] + " Param3: " + otherArgs[2]);
    conf.set("sentwordnetfile", otherArgs[0]);
    Job job = Job.getInstance(conf);
    job.setJarByClass(Sentiment.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.panguso.lc.analysis.format.Logcenter.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    context = new ClassPathXmlApplicationContext("applicationContext.xml");
    Properties prop = context.getBean("configProperties", Properties.class);
    // ??// w  w  w  . ja  v a2 s . co  m
    // String time = new DateTime().toString("yyyyMMddHH");

    // hadoop.lib=/application/format/lib/
    // hadoop.conf=/application/format/conf/
    // hadoop.src=/log/src/
    // hadoop.dest=/log/dest/
    // hadoop.archive=/log/archive/
    libPath = prop.getProperty("hadoop.lib");
    confPath = prop.getProperty("hadoop.conf");
    srcPath = prop.getProperty("hadoop.src");
    destPath = prop.getProperty("hadoop.dest");
    archivePath = prop.getProperty("hadoop.archive");
    Configuration conf = getConf();
    logger.info("libPath=" + libPath);
    logger.info("confPath=" + confPath);
    logger.info("srcPath=" + srcPath);
    logger.info("destPath=" + destPath);
    logger.info("archivePath=" + archivePath);

    FileSystem fs = FileSystem.get(conf);
    // --jar
    FileStatus[] fJars = fs.listStatus(new Path(libPath));
    for (FileStatus fileStatus : fJars) {
        String jar = libPath + fileStatus.getPath().getName();
        DistributedCache.addFileToClassPath(new Path(jar), conf, FileSystem.get(conf));
    }
    // --?
    FileStatus[] fProp = fs.listStatus(new Path(confPath));
    for (FileStatus fileStatus : fProp) {
        DistributedCache.addArchiveToClassPath(new Path(confPath + fileStatus.getPath().getName()), conf,
                FileSystem.get(conf));
    }
    FileStatus[] fDirs = fs.listStatus(new Path(srcPath));
    if (fDirs != null && fDirs.length > 0) {
        for (FileStatus file : fDirs) {
            // dir
            String currentTime = file.getPath().getName();
            String srcPathWithTime = srcPath + currentTime + "/";
            String destPathWithTime = destPath + currentTime + "/";
            String archPathWithTime = archivePath + currentTime + "/";
            // ??
            if (analysisService.isSuccessful(currentTime)) {
                continue;
            }

            // ??job?

            // 
            fs.delete(new Path(destPathWithTime), true);

            // ?
            // if (!fs.exists(new Path(srcPathWithTime))) {
            // logger.warn("outPath does not exist,inputPath=" +
            // srcPathWithTime);
            // analysisService.saveFailureJob(job.getJobName(),
            // currentTime);
            // return -1;
            // }
            // ?classpath";"":"
            Job job = new Job(conf);
            String jars = job.getConfiguration().get("mapred.job.classpath.files");
            job.getConfiguration().set("mapred.job.classpath.files", jars.replace(";", ":"));
            logger.info("current dir=" + currentTime);
            job.setJobName("format_" + currentTime);

            job.setJarByClass(Logcenter.class);
            job.setMapperClass(FormatAnalysisMapper.class);
            job.setReducerClass(FormatAnalysisReducer.class);
            job.setCombinerClass(FormatAnalysisReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            // job.setNumReduceTasks(0);
            // //??reduce????namenode
            FileInputFormat.addInputPath(job, new Path(srcPathWithTime));
            FileOutputFormat.setOutputPath(job, new Path(destPathWithTime));

            // ?
            boolean result = false;
            try {
                result = job.waitForCompletion(true);
            } catch (FileAlreadyExistsException e) {
                logger.warn(e.getMessage(), e);
            }
            if (!result) {
                logger.warn("job execute failure!");
                analysisService.saveFailureJob(job.getJobName(), currentTime);
                continue;
                // return -1;
            }

            // ,
            fs.delete(new Path(archPathWithTime), true);
            fs.rename(new Path(srcPathWithTime), new Path(archPathWithTime));
            analysisService.saveSuccessJob(job.getJobName(), currentTime);
        }
    }

    FileSystem.closeAll();
    return 0;
}

From source file:com.paperbook.test.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    //    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //    if (otherArgs.length != 2) {
    //      System.err.println("Usage: wordcount <in> <out>");
    //      System.exit(2);
    //    }/*from   w ww. j  a va2  s  . c  om*/
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    // The path is local path on the local file system not the hdfs
    FileInputFormat.addInputPath(job, new Path("resources/words.txt"));
    FileOutputFormat.setOutputPath(job, new Path("output"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.phantom.hadoop.examples.DBCountPageView.java

License:Apache License

@Override
// Usage DBCountPageView [driverClass dburl]
public int run(String[] args) throws Exception {

    String driverClassName = DRIVER_CLASS;
    String url = DB_URL;/*from w  w  w .  j  a  v a  2s  . c om*/

    if (args.length > 1) {
        driverClassName = args[0];
        url = args[1];
    }

    initialize(driverClassName, url);
    Configuration conf = getConf();

    DBConfiguration.configureDB(conf, driverClassName, url);

    Job job = new Job(conf);

    job.setJobName("Count Pageviews of URLs");
    job.setJarByClass(DBCountPageView.class);
    job.setMapperClass(PageviewMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(PageviewReducer.class);

    DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames);

    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(PageviewRecord.class);
    job.setOutputValueClass(NullWritable.class);
    int ret;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return ret;
}

From source file:com.phantom.hadoop.examples.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//  ww  w  .ja v  a 2  s. c o  m

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = new Job(conf);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = new Job(conf);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
                LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}