Example usage for org.apache.hadoop.mapred JobConf setReducerClass

List of usage examples for org.apache.hadoop.mapred JobConf setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> theClass) 

Source Link

Document

Set the Reducer class for the job.

Usage

From source file:graphbuilding.GenomixDriver.java

License:Apache License

public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath)
        throws IOException {

    JobConf conf = new JobConf(GenomixDriver.class);
    conf.setInt("sizeKmer", sizeKmer);

    if (defaultConfPath != null) {
        conf.addResource(new Path(defaultConfPath));
    }/*from   w w w  .jav  a 2s  .c om*/

    conf.setJobName("Genomix Graph Building");
    conf.setMapperClass(GenomixMapper.class);
    conf.setReducerClass(GenomixReducer.class);
    conf.setCombinerClass(GenomixCombiner.class);

    conf.setMapOutputKeyClass(Kmer.class);
    conf.setMapOutputValueClass(KmerCountValue.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(Kmer.class);
    conf.setOutputValueClass(KmerCountValue.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    conf.setNumReduceTasks(numReducers);

    FileSystem dfs = FileSystem.get(conf);
    dfs.delete(new Path(outputPath), true);
    JobClient.runJob(conf);
}

From source file:hadoopProcesses.testJob.java

public static void start(String[] args) {
    try {// w  w  w .  j  ava 2 s .  c  om

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        map Map = new map();
        conf.setMapperClass(Map.getClass());

        reducer Reduce = new reducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

        FileSystem FS = FileSystem.get(conf);

        Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000");

        if (FS.exists(src)) {
            System.out.println("\t\t------ Results ------ ");
            /*
             BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src)));
             String line;
             line = br.readLine();
             while (line != null) {
             System.out.println("\t" + line);
             line = br.readLine();
             }
             */

            List<String> FileList = (new fileInteractions()).readLines(src, conf);
            for (String LocString : FileList) {

                System.out.println(LocString);
            }
        }
    } catch (Exception Exp) {
        Exp.printStackTrace();
    }
}

From source file:hibench.DataGenerator.java

License:Apache License

public void sumUpZipf(Path fin, Path fout, ZipfRandom zipf) throws IOException {
    LOG.info("Summing up Zipfian Id Distirubtion...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = fin.getName() + " -> " + fout.getName();
    job.setJobName(jobname);/*from ww w. j  ava  2s .  c  om*/

    zipf.setJobConf(job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(SumUpZipfMapper.class);
    job.setReducerClass(SumUpZipfReducer.class);

    job.setNumReduceTasks(1); // Important to sequentially accumulate the required space

    job.setInputFormat(NLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    DataPaths.checkHdfsFile(fout, false);

    FileInputFormat.setInputPaths(job, fin);
    FileOutputFormat.setOutputPath(job, fout);

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file: " + fin);
    LOG.info("Zipfian sum up file as Ouput: " + fout);
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);
}

From source file:hibench.DataGenerator.java

License:Apache License

public void replaceIds(Path fcontent, Path fids, Path fjoin, ZipfRandom zipf) throws IOException {

    LOG.info("Replace Virtual Zipfian Ids with real Ids...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = fcontent.getName() + " JOIN " + fids.getName() + " -> " + fjoin.getName();

    job.setJobName(jobname);//  w w w  . jav a 2  s  .co  m

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    MultipleInputs.addInputPath(job, fids, TextInputFormat.class, TagRecordsMapper.class);
    MultipleInputs.addInputPath(job, fcontent, TextInputFormat.class, ReverseContentMapper.class);
    job.setOutputFormat(TextOutputFormat.class);

    // use combiner to avoid too many inputs for reducer
    job.setCombinerClass(ConcatTextCombiner.class);
    job.setReducerClass(JoinContentWithZipfReducer.class);

    if (zipf.reds > 0) {
        job.setNumReduceTasks(zipf.reds);
    } else {
        job.setNumReduceTasks(DataOptions.getMaxNumReduce());
    }

    FileOutputFormat.setOutputPath(job, fjoin);

    LOG.info("Running Job: " + jobname);
    LOG.info("Zipfian Id distribution: " + fids);
    LOG.info("Content file with virtual Ids: " + fcontent);
    LOG.info("Joint result file: " + fjoin);
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);
}

From source file:hibench.HiveDataGenerator.java

License:Apache License

private void createRankingsTable() throws IOException {

    LOG.info("Creating table rankings...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " rankings";

    job.setJobName(jobname);/*from w w  w  . ja  v a  2s  . co  m*/

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);

    job.setCombinerClass(ConcatTextCombiner.class);
    job.setReducerClass(CountRankingAndReplaceIdReducer.class);

    if (options.reds > 0) {
        job.setNumReduceTasks(options.reds);
    } else {
        job.setNumReduceTasks(DataOptions.getMaxNumReduce());
    }

    //      job.setNumReduceTasks(options.agents/2);

    /***
     * need to join result with LINK table so that to replace
     * url ids with real contents
     */
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.T_LINK_PAGE), TextInputFormat.class,
            MyIdentityMapper.class);
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class,
            TagRecordsMapper.class);

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");

        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.RANKINGS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Table link-page file " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input");
    LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output");
    LOG.info("Ouput file " + paths.getResult(DataPaths.RANKINGS));
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.RANKINGS));
}

From source file:hibench.HiveDataGenerator.java

License:Apache License

private void createUserVisitsTable() throws IOException, URISyntaxException {

    LOG.info("Creating user visits...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " uservisits";
    job.setJobName(jobname);//from ww w .ja v  a  2 s.c  o  m

    /***
     * Set distributed cache file for table generation,
     * cache files include:
     * 1. user agents
     * 2. country code and language code
     * 3. search keys
     */

    DistributedCache.addCacheFile(paths.getPath(DataPaths.uagentf).toUri(), job);
    DistributedCache.addCacheFile(paths.getPath(DataPaths.countryf).toUri(), job);
    DistributedCache.addCacheFile(paths.getPath(DataPaths.searchkeyf).toUri(), job);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);

    visit.setJobConf(job);

    job.setInputFormat(TextInputFormat.class);

    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.DUMMY), NLineInputFormat.class,
            CreateRandomAccessMapper.class);
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class,
            TagRecordsMapper.class);

    job.setCombinerClass(CreateUserVisitsCombiner.class);
    job.setReducerClass(CreateUserVisitsReducer.class);

    if (options.reds > 0) {
        job.setNumReduceTasks(options.reds);
    } else {
        job.setNumReduceTasks(DataOptions.getMaxNumReduce());
    }

    //      job.setNumReduceTasks(options.agents/2);

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.USERVISITS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file " + paths.getPath(DataPaths.DUMMY) + " as input");
    LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output");
    LOG.info("Ouput file " + paths.getResult(DataPaths.USERVISITS));
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.USERVISITS));
}

From source file:hibench.PageRankDataGenerator.java

License:Apache License

private void createPageRankNodes() throws IOException {

    LOG.info("Creating PageRank nodes...", null);

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " pagerank nodes";

    job.setJobName(jobname);//from  w ww  .j a  v a 2 s. c  o m

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.LINKS));
    job.setInputFormat(TextInputFormat.class);

    if (options.PAGERANK_NODE_BALANCE) {
        /***
         * Balance the output order of nodes, to prevent the running
         * of pagerank bench from potential data skew
         */
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setMapperClass(BalancedLinkNodesMapper.class);
        job.setReducerClass(BalancedLinkNodesReducer.class);
        //         job.setPartitionerClass(ModulusPartitioner.class);

        if (options.reds > 0) {
            job.setNumReduceTasks(options.reds);
        } else {
            job.setNumReduceTasks(DataOptions.getMaxNumReduce());
        }
    } else {
        job.setMapOutputKeyClass(Text.class);

        job.setMapperClass(OutputLinkNodesMapper.class);
        job.setNumReduceTasks(0);
    }

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.VERTICALS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Links file " + paths.getPath(DataPaths.LINKS) + " as input");
    LOG.info("Vertices file " + paths.getResult(DataPaths.VERTICALS) + " as output");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.VERTICALS));
}

From source file:hitune.analysis.mapreduce.processor.HadoopMetrics.java

License:Apache License

@Override
public void run() {
    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    JobConf conf = new JobConf(this.conf, HadoopMetrics.class);
    try {//from  w ww.j  a v a2  s . c om
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(HadoopMetrics.MapClass.class);
        conf.setReducerClass(SystemLog.ReduceClass.class);
        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputKeyClass(Text.class);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));
            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }

    } catch (Exception e) {

        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }

}

From source file:hitune.analysis.mapreduce.processor.HistoryLog.java

License:Apache License

public void run() {
    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    JobConf conf = new JobConf(this.conf, HistoryLog.class);
    try {//from  www.  j  a  v a 2  s .  co m
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(HistoryLog.MapClass.class);
        conf.setReducerClass(HistoryLog.ReduceClass.class);
        conf.setOutputKeyClass(Text.class);

        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));

            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }
    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }
}

From source file:hitune.analysis.mapreduce.processor.InstrumentDataflow.java

License:Apache License

@Override
public void run() {
    // TODO Auto-generated method stub

    long timestamp = System.currentTimeMillis();

    JobConf conf = new JobConf(this.conf, InstrumentDataflow.class);
    try {//from  w  w  w  .j  av  a2 s  . c o m
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(InstrumentDataflow.MapClass.class);
        conf.setReducerClass(InstrumentDataflow.ReduceClass.class);
        conf.setOutputKeyClass(Text.class);
        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));

            //FileInputFormat.setInputPathFilter(conf, evtFileFilter.class);
            //conf.setNumReduceTasks(1);

            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }
    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }
}