Example usage for org.apache.hadoop.mapred JobConf setMapOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) 

Source Link

Document

Set the value class for the map output data.

Usage

From source file:hibench.PageRankDataGenerator.java

License:Apache License

private void createPageRankNodes() throws IOException {

    LOG.info("Creating PageRank nodes...", null);

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " pagerank nodes";

    job.setJobName(jobname);//from w w  w.j  a va  2 s  .  c om

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.LINKS));
    job.setInputFormat(TextInputFormat.class);

    if (options.PAGERANK_NODE_BALANCE) {
        /***
         * Balance the output order of nodes, to prevent the running
         * of pagerank bench from potential data skew
         */
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setMapperClass(BalancedLinkNodesMapper.class);
        job.setReducerClass(BalancedLinkNodesReducer.class);
        //         job.setPartitionerClass(ModulusPartitioner.class);

        if (options.reds > 0) {
            job.setNumReduceTasks(options.reds);
        } else {
            job.setNumReduceTasks(DataOptions.getMaxNumReduce());
        }
    } else {
        job.setMapOutputKeyClass(Text.class);

        job.setMapperClass(OutputLinkNodesMapper.class);
        job.setNumReduceTasks(0);
    }

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.VERTICALS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Links file " + paths.getPath(DataPaths.LINKS) + " as input");
    LOG.info("Vertices file " + paths.getResult(DataPaths.VERTICALS) + " as output");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.VERTICALS));
}

From source file:hitune.analysis.mapreduce.processor.HadoopMetrics.java

License:Apache License

@Override
public void run() {
    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    JobConf conf = new JobConf(this.conf, HadoopMetrics.class);
    try {//  ww w  .  j  a va  2s .  c om
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(HadoopMetrics.MapClass.class);
        conf.setReducerClass(SystemLog.ReduceClass.class);
        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputKeyClass(Text.class);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));
            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }

    } catch (Exception e) {

        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }

}

From source file:hitune.analysis.mapreduce.processor.HistoryLog.java

License:Apache License

public void run() {
    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    JobConf conf = new JobConf(this.conf, HistoryLog.class);
    try {/* ww  w .  j a  v a  2s .  co m*/
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(HistoryLog.MapClass.class);
        conf.setReducerClass(HistoryLog.ReduceClass.class);
        conf.setOutputKeyClass(Text.class);

        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));

            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }
    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }
}

From source file:hitune.analysis.mapreduce.processor.InstrumentDataflow.java

License:Apache License

@Override
public void run() {
    // TODO Auto-generated method stub

    long timestamp = System.currentTimeMillis();

    JobConf conf = new JobConf(this.conf, InstrumentDataflow.class);
    try {//  ww w  . ja  v  a 2s  . c o m
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(InstrumentDataflow.MapClass.class);
        conf.setReducerClass(InstrumentDataflow.ReduceClass.class);
        conf.setOutputKeyClass(Text.class);
        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));

            //FileInputFormat.setInputPathFilter(conf, evtFileFilter.class);
            //conf.setNumReduceTasks(1);

            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }
    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }
}

From source file:hitune.analysis.mapreduce.processor.InstrumentSamplingTop.java

License:Apache License

@Override
public void run() {
    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    try {/*ww w  .jav  a  2  s  .  co  m*/
        JobConf conf = new JobConf(this.conf, InstrumentSamplingTop.class);
        conf.setJobName(this.getClass().getSimpleName() + "_1_" + timestamp);

        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(InstrumentSamplingTop.MapClass.class);
        conf.setReducerClass(InstrumentSamplingTop.ReduceClass.class);

        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);
        conf.setOutputKeyClass(outputKeyClass);
        conf.setOutputValueClass(outputValueClass);

        conf.setOutputFormat(SequenceFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);

        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(outputPaths + "_1_" + timestamp));

            try {

                //first job
                JobClient.runJob(conf);

                JobConf secondconf = new JobConf(this.conf, InstrumentSamplingTop.class);
                secondconf.setJobName(this.getClass().getSimpleName() + "_2_" + timestamp);
                secondconf.setInputFormat(SequenceFileInputFormat.class);
                secondconf.setMapperClass(IdentityMapper.class);
                secondconf.setReducerClass(InstrumentSamplingTop.TopClass.class);

                secondconf.setMapOutputKeyClass(outputKeyClass);
                secondconf.setMapOutputValueClass(outputValueClass);

                secondconf.setOutputKeyClass(Text.class);
                secondconf.setOutputValueClass(TextArrayWritable.class);
                secondconf.setOutputFormat(CSVFileOutputFormat.class);
                FileInputFormat.setInputPaths(secondconf, outputPaths + "_1_" + timestamp);
                FileOutputFormat.setOutputPath(secondconf, new Path(temp_outputPaths));

                //second job to get ranking list
                JobClient.runJob(secondconf);
                moveResults(secondconf, outputPaths, temp_outputPaths);
                Path temp = new Path(outputPaths + "_1_" + timestamp);
                temp.getFileSystem(conf).delete(temp);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }
    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }

}

From source file:hitune.analysis.mapreduce.processor.SystemLog.java

License:Apache License

@Override
public void run() {

    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    JobConf conf = new JobConf(this.conf, SystemLog.class);

    try {/*from   ww w. j av a2  s . co  m*/
        conf.setJobName(this.getClass().getSimpleName() + timestamp);

        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(SystemLog.MapClass.class);
        conf.setReducerClass(SystemLog.ReduceClass.class);

        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputKeyClass(Text.class);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));
            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");

        }

    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }

}

From source file:io.aos.t4f.hadoop.mapred.WordCountTest.java

License:Apache License

private JobConf createJobConf() {
    JobConf conf = mrCluster.createJobConf();
    conf.setJobName("wordcount test");

    conf.setMapperClass(WordCountMapper.class);
    conf.setReducerClass(WordCountReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setNumMapTasks(1);//from w w  w. j a  v  a 2s . c o  m
    conf.setNumReduceTasks(1);
    FileInputFormat.setInputPaths(conf, input);
    FileOutputFormat.setOutputPath(conf, output);
    return conf;
}

From source file:io.fluo.stress.trie.Unique.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length < 1) {
        log.error("Usage: " + this.getClass().getSimpleName() + "<input dir>{ <input dir>}");
        System.exit(-1);/*from www  .  ja va 2 s  .  c o  m*/
    }

    JobConf job = new JobConf(getConf());

    job.setJobName(Unique.class.getName());
    job.setJarByClass(Unique.class);

    job.setInputFormat(SequenceFileInputFormat.class);
    for (String arg : args) {
        SequenceFileInputFormat.addInputPath(job, new Path(arg));
    }

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setReducerClass(UniqueReducer.class);

    job.setOutputFormat(NullOutputFormat.class);

    RunningJob runningJob = JobClient.runJob(job);
    runningJob.waitForCompletion();
    numUnique = (int) runningJob.getCounters().getCounter(Stats.UNIQUE);

    log.debug("numUnique : " + numUnique);

    return runningJob.isSuccessful() ? 0 : -1;

}

From source file:ivory.core.preprocess.BuildTargetLangWeightedIntDocVectors.java

License:Apache License

@SuppressWarnings("deprecation")
public int runTool() throws Exception {
    //      sLogger.setLevel(Level.DEBUG);

    sLogger.info("PowerTool: GetTargetLangWeightedIntDocVectors");

    JobConf conf = new JobConf(BuildTargetLangWeightedIntDocVectors.class);
    FileSystem fs = FileSystem.get(conf);

    String indexPath = getConf().get("Ivory.IndexPath");

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    String outputPath = env.getWeightedIntDocVectorsDirectory();
    int mapTasks = getConf().getInt("Ivory.NumMapTasks", 0);
    int minSplitSize = getConf().getInt("Ivory.MinSplitSize", 0);
    String collectionName = getConf().get("Ivory.CollectionName");

    sLogger.info("Characteristics of the collection:");
    sLogger.info(" - CollectionName: " + collectionName);
    sLogger.info("Characteristics of the job:");
    sLogger.info(" - NumMapTasks: " + mapTasks);
    sLogger.info(" - MinSplitSize: " + minSplitSize);

    String vocabFile = getConf().get("Ivory.FinalVocab");
    DistributedCache.addCacheFile(new URI(vocabFile), conf);

    Path inputPath = new Path(PwsimEnvironment.getFileNameWithPars(indexPath, "TermDocs"));
    Path weightedVectorsPath = new Path(outputPath);

    if (fs.exists(weightedVectorsPath)) {
        sLogger.info("Output path already exists!");
        return -1;
    }/*from  www  . ja v  a  2  s.c o  m*/
    conf.setJobName("GetWeightedIntDocVectors:" + collectionName);
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(0);
    conf.setInt("mapred.min.split.size", minSplitSize);
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setBoolean("Ivory.Normalize", getConf().getBoolean("Ivory.Normalize", false));
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, weightedVectorsPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(WeightedIntDocVector.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(WeightedIntDocVector.class);

    conf.setMapperClass(MyMapper.class);

    long startTime = System.currentTimeMillis();

    RunningJob rj = JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    Counters counters = rj.getCounters();

    long numOfDocs = (long) counters.findCounter(Docs.Total).getCounter();

    return (int) numOfDocs;
}

From source file:ivory.core.preprocess.BuildWeightedIntDocVectors.java

License:Apache License

@SuppressWarnings("deprecation")
public int runTool() throws Exception {
    sLogger.setLevel(Level.WARN);

    sLogger.info("PowerTool: GetWeightedIntDocVectors");

    // create a new JobConf, inheriting from the configuration of this
    // PowerTool//from  w  ww  .ja v  a 2 s .  c om
    JobConf conf = new JobConf(getConf(), BuildWeightedIntDocVectors.class);
    FileSystem fs = FileSystem.get(conf);

    String indexPath = conf.get("Ivory.IndexPath");
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    String outputPath = env.getWeightedIntDocVectorsDirectory();
    int mapTasks = conf.getInt("Ivory.NumMapTasks", 0);
    int minSplitSize = conf.getInt("Ivory.MinSplitSize", 0);
    String collectionName = conf.get("Ivory.CollectionName");

    sLogger.info("Characteristics of the collection:");
    sLogger.info(" - CollectionName: " + collectionName);
    sLogger.info("Characteristics of the job:");
    sLogger.info(" - NumMapTasks: " + mapTasks);
    sLogger.info(" - MinSplitSize: " + minSplitSize);

    String dfByIntFilePath = env.getDfByIntData();
    String cfByIntFilePath = env.getCfByIntData();

    /* add df table to cache */
    if (!fs.exists(new Path(dfByIntFilePath))) {
        throw new RuntimeException("Error, df data file " + dfByIntFilePath + "doesn't exist!");
    }
    DistributedCache.addCacheFile(new URI(dfByIntFilePath), conf);

    /* add cf table to cache */
    if (!fs.exists(new Path(cfByIntFilePath))) {
        throw new RuntimeException("Error, cf data file " + cfByIntFilePath + "doesn't exist!");
    }
    DistributedCache.addCacheFile(new URI(cfByIntFilePath), conf);

    /* add dl table to cache */
    Path docLengthFile = env.getDoclengthsData();
    if (!fs.exists(docLengthFile)) {
        throw new RuntimeException("Error, doc-length data file " + docLengthFile + "doesn't exist!");
    }
    DistributedCache.addCacheFile(docLengthFile.toUri(), conf);

    Path inputPath = new Path(env.getIntDocVectorsDirectory());
    Path weightedVectorsPath = new Path(outputPath);

    if (fs.exists(weightedVectorsPath)) {
        sLogger.info("Output path already exists!");
        return 0;
    }

    //fs.delete(weightedVectirsPath, true);

    conf.setJobName("GetWeightedIntDocVectors:" + collectionName);
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(0);
    conf.setInt("mapred.min.split.size", minSplitSize);
    conf.set("mapred.child.java.opts", "-Xmx2048m");

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, weightedVectorsPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(WeightedIntDocVector.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(WeightedIntDocVector.class);

    conf.setMapperClass(MyMapper.class);
    //conf.setInt("mapred.task.timeout",3600000);

    long startTime = System.currentTimeMillis();

    RunningJob job = JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}