Example usage for org.apache.hadoop.mapred JobConf setOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass) 

Source Link

Document

Set the value class for job outputs.

Usage

From source file:graphbuilding.GenomixDriver.java

License:Apache License

public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath)
        throws IOException {

    JobConf conf = new JobConf(GenomixDriver.class);
    conf.setInt("sizeKmer", sizeKmer);

    if (defaultConfPath != null) {
        conf.addResource(new Path(defaultConfPath));
    }/*  ww  w. ja  v a  2 s  .c  om*/

    conf.setJobName("Genomix Graph Building");
    conf.setMapperClass(GenomixMapper.class);
    conf.setReducerClass(GenomixReducer.class);
    conf.setCombinerClass(GenomixCombiner.class);

    conf.setMapOutputKeyClass(Kmer.class);
    conf.setMapOutputValueClass(KmerCountValue.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(Kmer.class);
    conf.setOutputValueClass(KmerCountValue.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    conf.setNumReduceTasks(numReducers);

    FileSystem dfs = FileSystem.get(conf);
    dfs.delete(new Path(outputPath), true);
    JobClient.runJob(conf);
}

From source file:hadoop.UIUCWikifierAppHadoop.java

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobConf job = new JobConf(conf, UIUCWikifierAppHadoop.class);

    //      System.out.println("Run.. Envinronment Variables");
    //      java.util.Map<String,String> env = System.getenv();
    ////ww  w  .  j a  v  a2s .  c  o  m
    //      System.out.println("Printing environment variables");
    //      for(String k : env.keySet()){
    //         System.out.println(k + "\t" + env.get(k));
    //      }
    //      String jlpValue = System.getProperty("java.library.path");
    //      System.out.println("java.library.path=" + jlpValue);
    //      System.setProperty("java.library.path", jlpValue + ":" + "/home/jgilme1/bin/gurobi550/linux64/lib");

    //process command line options
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    //change current working directory to hdfs path..
    job.setJobName("entitylinker");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormat(DistributeInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setMapperClass(Map.class);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setNumReduceTasks(0);
    job.setNumMapTasks(Integer.parseInt(args[2]));
    job.set("mapreduce.input.fileinputformat.split.minsize", "0");
    job.set("mapred.child.java.opts", "-Xmx16g");
    job.setNumTasksToExecutePerJvm(-1);
    //job.setMemoryForMapTask(new Long(12288));
    //job.set(JobConf.MAPRED_MAP_TASK_ULIMIT, "12582912");

    String gurobiHomeVariable = "GUROBI_HOME";
    String gurobiHomeValue = "/home/jgilme1/bin/gurobi560/linux64";
    String pathVariable = "PATH";
    String newPathValue = gurobiHomeValue + "/bin";
    String ldLibraryPathVariable = "LD_LIBRARY_PATH";
    String ldLibraryPathValue = gurobiHomeValue + "/lib";
    String grbLicenseFileVariable = "GRB_LICENSE_FILE";
    String grbLicenseFileValue = "/scratch6/usr/jgilme1/gurobiLicense/gurobi.lic";

    StringBuilder newEnvironment = new StringBuilder();
    newEnvironment.append(gurobiHomeVariable);
    newEnvironment.append("=");
    newEnvironment.append(gurobiHomeValue);
    newEnvironment.append(",");
    newEnvironment.append(pathVariable);
    newEnvironment.append("=");
    newEnvironment.append("$" + pathVariable + ":");
    newEnvironment.append(newPathValue);
    newEnvironment.append(",");
    newEnvironment.append(ldLibraryPathVariable);
    newEnvironment.append("=$" + ldLibraryPathVariable + ":");
    newEnvironment.append(ldLibraryPathValue);
    newEnvironment.append(",");
    newEnvironment.append(grbLicenseFileVariable);
    newEnvironment.append("=");
    newEnvironment.append(grbLicenseFileValue);

    //System.out.println(newEnvironment.toString());
    job.set(JobConf.MAPRED_MAP_TASK_ENV, newEnvironment.toString());

    DistributedCache.addCacheArchive(new URI("/user/jgilme1/entitylinking/Wikifier2013.tar.gz"), job);

    JobClient.runJob(job);
    return 0;
}

From source file:hadoopProcesses.testJob.java

public static void start(String[] args) {
    try {// w  w w .  ja  va  2s .  co m

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        map Map = new map();
        conf.setMapperClass(Map.getClass());

        reducer Reduce = new reducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

        FileSystem FS = FileSystem.get(conf);

        Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000");

        if (FS.exists(src)) {
            System.out.println("\t\t------ Results ------ ");
            /*
             BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src)));
             String line;
             line = br.readLine();
             while (line != null) {
             System.out.println("\t" + line);
             line = br.readLine();
             }
             */

            List<String> FileList = (new fileInteractions()).readLines(src, conf);
            for (String LocString : FileList) {

                System.out.println(LocString);
            }
        }
    } catch (Exception Exp) {
        Exp.printStackTrace();
    }
}

From source file:hibench.DataGenerator.java

License:Apache License

public void sumUpZipf(Path fin, Path fout, ZipfRandom zipf) throws IOException {
    LOG.info("Summing up Zipfian Id Distirubtion...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = fin.getName() + " -> " + fout.getName();
    job.setJobName(jobname);//from w ww  .j a  va 2s.  co  m

    zipf.setJobConf(job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(SumUpZipfMapper.class);
    job.setReducerClass(SumUpZipfReducer.class);

    job.setNumReduceTasks(1); // Important to sequentially accumulate the required space

    job.setInputFormat(NLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    DataPaths.checkHdfsFile(fout, false);

    FileInputFormat.setInputPaths(job, fin);
    FileOutputFormat.setOutputPath(job, fout);

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file: " + fin);
    LOG.info("Zipfian sum up file as Ouput: " + fout);
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);
}

From source file:hibench.DataGenerator.java

License:Apache License

public void createZipf(Path fin, Path fout, ZipfRandom zipf) throws IOException {
    LOG.info("Creating Zipfian Id Distirubtion...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = fin.getName() + " -> " + fout.getName();

    job.setJobName(jobname);/*  ww  w. j  a  va 2s  .c  o m*/

    zipf.setJobConf(job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CreateZipfDistrMapper.class);

    job.setNumReduceTasks(0);

    job.setInputFormat(NLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, fin);
    FileOutputFormat.setOutputPath(job, fout);

    DataPaths.checkHdfsFile(fout, false);

    LOG.info("Running Job: " + jobname);
    LOG.info("Zipfian Sum File: " + fin);
    LOG.info("Zipfian Id distribution as Ouput: " + fout);
    RunningJob jobCreateZipf = JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    long vElems = jobCreateZipf.getCounters().getCounter(MAP_OUTPUT_RECORDS);
    LOG.info("Created " + vElems + " virtual zipfian elements");
    zipf.setVirtElems(vElems);
}

From source file:hibench.DataGenerator.java

License:Apache License

public void replaceIds(Path fcontent, Path fids, Path fjoin, ZipfRandom zipf) throws IOException {

    LOG.info("Replace Virtual Zipfian Ids with real Ids...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = fcontent.getName() + " JOIN " + fids.getName() + " -> " + fjoin.getName();

    job.setJobName(jobname);/*  w  ww .j a v a 2s .  c o  m*/

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    MultipleInputs.addInputPath(job, fids, TextInputFormat.class, TagRecordsMapper.class);
    MultipleInputs.addInputPath(job, fcontent, TextInputFormat.class, ReverseContentMapper.class);
    job.setOutputFormat(TextOutputFormat.class);

    // use combiner to avoid too many inputs for reducer
    job.setCombinerClass(ConcatTextCombiner.class);
    job.setReducerClass(JoinContentWithZipfReducer.class);

    if (zipf.reds > 0) {
        job.setNumReduceTasks(zipf.reds);
    } else {
        job.setNumReduceTasks(DataOptions.getMaxNumReduce());
    }

    FileOutputFormat.setOutputPath(job, fjoin);

    LOG.info("Running Job: " + jobname);
    LOG.info("Zipfian Id distribution: " + fids);
    LOG.info("Content file with virtual Ids: " + fcontent);
    LOG.info("Joint result file: " + fjoin);
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);
}

From source file:hibench.DataGenerator.java

License:Apache License

public void createHtmlPages(Path dummy, HtmlConf html) throws IOException {

    LOG.info("Creating Html Pages...");

    Path fout = new Path(dummy.getParent(), "tmp");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create html pages to " + fout.getName();

    job.setJobName(jobname);//from w w  w .j  a va2s.c o  m

    html.setJobConf(job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CreateHtmlPagesMapper.class);

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, dummy);

    // first create result files under tmp folder
    FileOutputFormat.setOutputPath(job, fout);

    // begin from dummy file
    job.setInputFormat(NLineInputFormat.class);

    // use MultipleTextOutputFormat to produce three out files defined
    // in PathConf, i.e., LINK, PAGE_ZLINK_TABLE, PAGE_ZWORD_TABLE
    job.setOutputFormat(HtmlMultipleTextOutputFormat.class);

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file: " + dummy);
    LOG.info("Multiple result Html files as <links, words, urls>");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    // Move result files under tmp into parent path
    // and remove the empty tmp path finally 
    DataPaths.moveFilesToParent(fout);
}

From source file:hibench.HiveDataGenerator.java

License:Apache License

private void createRankingsTable() throws IOException {

    LOG.info("Creating table rankings...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " rankings";

    job.setJobName(jobname);/*from w w  w .  jav a  2 s . c o m*/

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);

    job.setCombinerClass(ConcatTextCombiner.class);
    job.setReducerClass(CountRankingAndReplaceIdReducer.class);

    if (options.reds > 0) {
        job.setNumReduceTasks(options.reds);
    } else {
        job.setNumReduceTasks(DataOptions.getMaxNumReduce());
    }

    //      job.setNumReduceTasks(options.agents/2);

    /***
     * need to join result with LINK table so that to replace
     * url ids with real contents
     */
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.T_LINK_PAGE), TextInputFormat.class,
            MyIdentityMapper.class);
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class,
            TagRecordsMapper.class);

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");

        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.RANKINGS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Table link-page file " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input");
    LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output");
    LOG.info("Ouput file " + paths.getResult(DataPaths.RANKINGS));
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.RANKINGS));
}

From source file:hibench.HiveDataGenerator.java

License:Apache License

private void createUserVisitsTable() throws IOException, URISyntaxException {

    LOG.info("Creating user visits...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " uservisits";
    job.setJobName(jobname);/*from  w w  w  . j ava  2  s.co  m*/

    /***
     * Set distributed cache file for table generation,
     * cache files include:
     * 1. user agents
     * 2. country code and language code
     * 3. search keys
     */

    DistributedCache.addCacheFile(paths.getPath(DataPaths.uagentf).toUri(), job);
    DistributedCache.addCacheFile(paths.getPath(DataPaths.countryf).toUri(), job);
    DistributedCache.addCacheFile(paths.getPath(DataPaths.searchkeyf).toUri(), job);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);

    visit.setJobConf(job);

    job.setInputFormat(TextInputFormat.class);

    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.DUMMY), NLineInputFormat.class,
            CreateRandomAccessMapper.class);
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class,
            TagRecordsMapper.class);

    job.setCombinerClass(CreateUserVisitsCombiner.class);
    job.setReducerClass(CreateUserVisitsReducer.class);

    if (options.reds > 0) {
        job.setNumReduceTasks(options.reds);
    } else {
        job.setNumReduceTasks(DataOptions.getMaxNumReduce());
    }

    //      job.setNumReduceTasks(options.agents/2);

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.USERVISITS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file " + paths.getPath(DataPaths.DUMMY) + " as input");
    LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output");
    LOG.info("Ouput file " + paths.getResult(DataPaths.USERVISITS));
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.USERVISITS));
}

From source file:hibench.PageRankDataGenerator.java

License:Apache License

private void createPageRankNodes() throws IOException {

    LOG.info("Creating PageRank nodes...", null);

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " pagerank nodes";

    job.setJobName(jobname);//from ww  w . jav a 2 s  . co m

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.LINKS));
    job.setInputFormat(TextInputFormat.class);

    if (options.PAGERANK_NODE_BALANCE) {
        /***
         * Balance the output order of nodes, to prevent the running
         * of pagerank bench from potential data skew
         */
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setMapperClass(BalancedLinkNodesMapper.class);
        job.setReducerClass(BalancedLinkNodesReducer.class);
        //         job.setPartitionerClass(ModulusPartitioner.class);

        if (options.reds > 0) {
            job.setNumReduceTasks(options.reds);
        } else {
            job.setNumReduceTasks(DataOptions.getMaxNumReduce());
        }
    } else {
        job.setMapOutputKeyClass(Text.class);

        job.setMapperClass(OutputLinkNodesMapper.class);
        job.setNumReduceTasks(0);
    }

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.VERTICALS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Links file " + paths.getPath(DataPaths.LINKS) + " as input");
    LOG.info("Vertices file " + paths.getResult(DataPaths.VERTICALS) + " as output");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.VERTICALS));
}