Example usage for org.apache.hadoop.mapred JobConf setOutputKeyClass

List of usage examples for org.apache.hadoop.mapred JobConf setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) 

Source Link

Document

Set the key class for the job output data.

Usage

From source file:hadoopProcesses.testJob.java

public static void start(String[] args) {
    try {//from   w  ww.ja  v  a2  s  .c  o m

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        map Map = new map();
        conf.setMapperClass(Map.getClass());

        reducer Reduce = new reducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

        FileSystem FS = FileSystem.get(conf);

        Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000");

        if (FS.exists(src)) {
            System.out.println("\t\t------ Results ------ ");
            /*
             BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src)));
             String line;
             line = br.readLine();
             while (line != null) {
             System.out.println("\t" + line);
             line = br.readLine();
             }
             */

            List<String> FileList = (new fileInteractions()).readLines(src, conf);
            for (String LocString : FileList) {

                System.out.println(LocString);
            }
        }
    } catch (Exception Exp) {
        Exp.printStackTrace();
    }
}

From source file:hibench.DataGenerator.java

License:Apache License

public void sumUpZipf(Path fin, Path fout, ZipfRandom zipf) throws IOException {
    LOG.info("Summing up Zipfian Id Distirubtion...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = fin.getName() + " -> " + fout.getName();
    job.setJobName(jobname);//from w  w w  .  ja  v a 2s.  c o m

    zipf.setJobConf(job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(SumUpZipfMapper.class);
    job.setReducerClass(SumUpZipfReducer.class);

    job.setNumReduceTasks(1); // Important to sequentially accumulate the required space

    job.setInputFormat(NLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    DataPaths.checkHdfsFile(fout, false);

    FileInputFormat.setInputPaths(job, fin);
    FileOutputFormat.setOutputPath(job, fout);

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file: " + fin);
    LOG.info("Zipfian sum up file as Ouput: " + fout);
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);
}

From source file:hibench.DataGenerator.java

License:Apache License

public void createZipf(Path fin, Path fout, ZipfRandom zipf) throws IOException {
    LOG.info("Creating Zipfian Id Distirubtion...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = fin.getName() + " -> " + fout.getName();

    job.setJobName(jobname);//  www  .  ja v a 2  s .  c om

    zipf.setJobConf(job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CreateZipfDistrMapper.class);

    job.setNumReduceTasks(0);

    job.setInputFormat(NLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, fin);
    FileOutputFormat.setOutputPath(job, fout);

    DataPaths.checkHdfsFile(fout, false);

    LOG.info("Running Job: " + jobname);
    LOG.info("Zipfian Sum File: " + fin);
    LOG.info("Zipfian Id distribution as Ouput: " + fout);
    RunningJob jobCreateZipf = JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    long vElems = jobCreateZipf.getCounters().getCounter(MAP_OUTPUT_RECORDS);
    LOG.info("Created " + vElems + " virtual zipfian elements");
    zipf.setVirtElems(vElems);
}

From source file:hibench.DataGenerator.java

License:Apache License

public void replaceIds(Path fcontent, Path fids, Path fjoin, ZipfRandom zipf) throws IOException {

    LOG.info("Replace Virtual Zipfian Ids with real Ids...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = fcontent.getName() + " JOIN " + fids.getName() + " -> " + fjoin.getName();

    job.setJobName(jobname);/*from  w  ww . j  ava  2s  .  c om*/

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    MultipleInputs.addInputPath(job, fids, TextInputFormat.class, TagRecordsMapper.class);
    MultipleInputs.addInputPath(job, fcontent, TextInputFormat.class, ReverseContentMapper.class);
    job.setOutputFormat(TextOutputFormat.class);

    // use combiner to avoid too many inputs for reducer
    job.setCombinerClass(ConcatTextCombiner.class);
    job.setReducerClass(JoinContentWithZipfReducer.class);

    if (zipf.reds > 0) {
        job.setNumReduceTasks(zipf.reds);
    } else {
        job.setNumReduceTasks(DataOptions.getMaxNumReduce());
    }

    FileOutputFormat.setOutputPath(job, fjoin);

    LOG.info("Running Job: " + jobname);
    LOG.info("Zipfian Id distribution: " + fids);
    LOG.info("Content file with virtual Ids: " + fcontent);
    LOG.info("Joint result file: " + fjoin);
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);
}

From source file:hibench.DataGenerator.java

License:Apache License

public void createHtmlPages(Path dummy, HtmlConf html) throws IOException {

    LOG.info("Creating Html Pages...");

    Path fout = new Path(dummy.getParent(), "tmp");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create html pages to " + fout.getName();

    job.setJobName(jobname);/*ww w .  j  a va2 s.  c o  m*/

    html.setJobConf(job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CreateHtmlPagesMapper.class);

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, dummy);

    // first create result files under tmp folder
    FileOutputFormat.setOutputPath(job, fout);

    // begin from dummy file
    job.setInputFormat(NLineInputFormat.class);

    // use MultipleTextOutputFormat to produce three out files defined
    // in PathConf, i.e., LINK, PAGE_ZLINK_TABLE, PAGE_ZWORD_TABLE
    job.setOutputFormat(HtmlMultipleTextOutputFormat.class);

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file: " + dummy);
    LOG.info("Multiple result Html files as <links, words, urls>");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    // Move result files under tmp into parent path
    // and remove the empty tmp path finally 
    DataPaths.moveFilesToParent(fout);
}

From source file:hibench.HiveDataGenerator.java

License:Apache License

private void createRankingsTable() throws IOException {

    LOG.info("Creating table rankings...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " rankings";

    job.setJobName(jobname);/*from w  ww  .  j av  a2  s  .c  o m*/

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);

    job.setCombinerClass(ConcatTextCombiner.class);
    job.setReducerClass(CountRankingAndReplaceIdReducer.class);

    if (options.reds > 0) {
        job.setNumReduceTasks(options.reds);
    } else {
        job.setNumReduceTasks(DataOptions.getMaxNumReduce());
    }

    //      job.setNumReduceTasks(options.agents/2);

    /***
     * need to join result with LINK table so that to replace
     * url ids with real contents
     */
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.T_LINK_PAGE), TextInputFormat.class,
            MyIdentityMapper.class);
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class,
            TagRecordsMapper.class);

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");

        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.RANKINGS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Table link-page file " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input");
    LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output");
    LOG.info("Ouput file " + paths.getResult(DataPaths.RANKINGS));
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.RANKINGS));
}

From source file:hibench.HiveDataGenerator.java

License:Apache License

private void createUserVisitsTable() throws IOException, URISyntaxException {

    LOG.info("Creating user visits...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " uservisits";
    job.setJobName(jobname);/*ww  w.  java2  s .  c  o m*/

    /***
     * Set distributed cache file for table generation,
     * cache files include:
     * 1. user agents
     * 2. country code and language code
     * 3. search keys
     */

    DistributedCache.addCacheFile(paths.getPath(DataPaths.uagentf).toUri(), job);
    DistributedCache.addCacheFile(paths.getPath(DataPaths.countryf).toUri(), job);
    DistributedCache.addCacheFile(paths.getPath(DataPaths.searchkeyf).toUri(), job);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);

    visit.setJobConf(job);

    job.setInputFormat(TextInputFormat.class);

    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.DUMMY), NLineInputFormat.class,
            CreateRandomAccessMapper.class);
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class,
            TagRecordsMapper.class);

    job.setCombinerClass(CreateUserVisitsCombiner.class);
    job.setReducerClass(CreateUserVisitsReducer.class);

    if (options.reds > 0) {
        job.setNumReduceTasks(options.reds);
    } else {
        job.setNumReduceTasks(DataOptions.getMaxNumReduce());
    }

    //      job.setNumReduceTasks(options.agents/2);

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.USERVISITS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file " + paths.getPath(DataPaths.DUMMY) + " as input");
    LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output");
    LOG.info("Ouput file " + paths.getResult(DataPaths.USERVISITS));
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.USERVISITS));
}

From source file:hibench.PageRankDataGenerator.java

License:Apache License

private void createPageRankNodes() throws IOException {

    LOG.info("Creating PageRank nodes...", null);

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " pagerank nodes";

    job.setJobName(jobname);//  w  ww. j  a  v a 2  s. com

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.LINKS));
    job.setInputFormat(TextInputFormat.class);

    if (options.PAGERANK_NODE_BALANCE) {
        /***
         * Balance the output order of nodes, to prevent the running
         * of pagerank bench from potential data skew
         */
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setMapperClass(BalancedLinkNodesMapper.class);
        job.setReducerClass(BalancedLinkNodesReducer.class);
        //         job.setPartitionerClass(ModulusPartitioner.class);

        if (options.reds > 0) {
            job.setNumReduceTasks(options.reds);
        } else {
            job.setNumReduceTasks(DataOptions.getMaxNumReduce());
        }
    } else {
        job.setMapOutputKeyClass(Text.class);

        job.setMapperClass(OutputLinkNodesMapper.class);
        job.setNumReduceTasks(0);
    }

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.VERTICALS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Links file " + paths.getPath(DataPaths.LINKS) + " as input");
    LOG.info("Vertices file " + paths.getResult(DataPaths.VERTICALS) + " as output");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.VERTICALS));
}

From source file:hibench.PageRankDataGenerator.java

License:Apache License

/***
 * Create pagerank edge table, output link A->B as <A, B> pairs
 * @throws IOException//from   ww w  . j  a  v  a2 s .c om
 */
private void createPageRankLinks() throws IOException {

    LOG.info("Creating PageRank links", null);

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " pagerank links";

    job.setJobName(jobname);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.T_LINK_PAGE));
    job.setInputFormat(TextInputFormat.class);

    job.setMapperClass(OutputLinkEdgesMapper.class);

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.EDGES));

    LOG.info("Running Job: " + jobname);
    LOG.info("Table link-page " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input");
    LOG.info("Edges file " + paths.getResult(DataPaths.EDGES) + " as output");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.EDGES));
}

From source file:hitune.analysis.mapreduce.processor.HadoopMetrics.java

License:Apache License

@Override
public void run() {
    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    JobConf conf = new JobConf(this.conf, HadoopMetrics.class);
    try {// w w  w  . j av a2  s.co  m
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(HadoopMetrics.MapClass.class);
        conf.setReducerClass(SystemLog.ReduceClass.class);
        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputKeyClass(Text.class);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));
            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }

    } catch (Exception e) {

        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }

}