Example usage for org.apache.hadoop.mapred JobConf setOutputFormat

List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputFormat.

Prototype

public void setOutputFormat(Class<? extends OutputFormat> theClass) 

Source Link

Document

Set the OutputFormat implementation for the map-reduce job.

Usage

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.gpu.MatrixMultiplicationGpu.java

License:Apache License

public static Configuration createMatrixMultiplicationGpuConf(Configuration initialConf, Path aPath, Path bPath,
        Path outPath, int outCardinality, int tileWidth, boolean isDebugging) {

    JobConf conf = new JobConf(initialConf, MatrixMultiplicationGpu.class);
    conf.setJobName("MatrixMultiplicationGPU: " + aPath + " x " + bPath + " = " + outPath);

    conf.setInt(CONF_OUT_CARD, outCardinality);
    conf.setInt(CONF_TILE_WIDTH, tileWidth);
    conf.setBoolean(CONF_DEBUG, isDebugging);

    conf.setInputFormat(CompositeInputFormat.class);
    conf.set("mapred.join.expr",
            CompositeInputFormat.compose("inner", SequenceFileInputFormat.class, aPath, bPath));

    conf.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(conf, outPath);

    conf.setMapperClass(MatrixMultiplyGpuMapper.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(VectorWritable.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(VectorWritable.class);

    // Increase client heap size for GPU Rootbeer execution
    conf.set("mapred.child.java.opts", "-Xms8G -Xmx8G");

    // No Reduce step is needed
    // -> 0 reducer means reduce step will be skipped and
    // mapper output will be the final out
    // -> Identity reducer means then shuffling/sorting will still take place
    conf.setNumReduceTasks(0);//w  w w .ja  v  a  2s  .c  o  m

    return conf;
}

From source file:average.AverageDriver.java

public static void main(String[] args) {
    JobClient client = new JobClient();
    // Configurations for Job set in this variable
    JobConf conf = new JobConf(average.AverageDriver.class);

    // Name of the Job
    conf.setJobName("BookCrossing1.0");

    // Data type of Output Key and Value
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    // Setting the Mapper and Reducer Class
    conf.setMapperClass(average.AverageMapper.class);
    conf.setReducerClass(average.AverageReducer.class);

    // Formats of the Data Type of Input and output
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // Specify input and output DIRECTORIES (not files)
    FileInputFormat.setInputPaths(conf, new Path(args[1]));
    FileOutputFormat.setOutputPath(conf, new Path(args[2]));

    client.setConf(conf);/*from w w w  .  j  a  v a  2s.  c  om*/
    try {
        // Running the job with Configurations set in the conf.
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:averageprocessingtimesbytype.AverageProcessingTimesByType.java

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    JobConf job = new JobConf(conf, AverageProcessingTimesByType.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("AverageProcessingTimesByType");
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //   job.set("key.value.separator.in.input.line", "");

    JobClient.runJob(job);/* ww  w .j a  v a2  s . co m*/

    return 0;
}

From source file:avro.HadoopAvro.java

License:Open Source License

private JobConf createJobConfig() throws IOException {
    Path inputPath = new Path(INPUT_PATH);
    Path outputPath = new Path(OUTPUT_PATH);

    FileSystem.get(new Configuration()).delete(outputPath, true);

    JobConf jobConfig = new JobConf();
    jobConfig.setInputFormat(AvroInputFormat.class);
    jobConfig.setOutputFormat(AvroOutputFormat.class);
    AvroOutputFormat.setOutputPath(jobConfig, outputPath);
    AvroInputFormat.addInputPath(jobConfig, inputPath);
    jobConfig.set(AvroJob.OUTPUT_SCHEMA, User.SCHEMA.toString());
    jobConfig.set(AvroJob.INPUT_SCHEMA, User.SCHEMA.toString());
    return jobConfig;
}

From source file:azkaban.jobtype.examples.java.WordCount.java

License:Apache License

public void run() throws Exception {
    logger.info(String.format("Starting %s", getClass().getSimpleName()));

    // hadoop conf should be on the classpath
    JobConf jobconf = getJobConf();
    jobconf.setJarByClass(WordCount.class);

    jobconf.setOutputKeyClass(Text.class);
    jobconf.setOutputValueClass(IntWritable.class);

    jobconf.setMapperClass(Map.class);
    jobconf.setReducerClass(Reduce.class);

    jobconf.setInputFormat(TextInputFormat.class);
    jobconf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.addInputPath(jobconf, new Path(inputPath));
    FileOutputFormat.setOutputPath(jobconf, new Path(outputPath));

    if (forceOutputOverrite) {
        FileSystem fs = FileOutputFormat.getOutputPath(jobconf).getFileSystem(jobconf);
        fs.delete(FileOutputFormat.getOutputPath(jobconf), true);
    }/*from   w w  w.  java  2s . co m*/

    super.run();
}

From source file:babel.prep.corpus.CorpusGenerator.java

License:Apache License

/**
 * Configures a map-only dataset generation job.
 *///from  w  w w  .j  a va 2s.  com
protected JobConf createJobConf(String crawlDir, String pagesSubDir, boolean xmlOut) throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("create " + (xmlOut ? "xml formatted" : "") + " dataset from " + pagesSubDir);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(CorpusGenMapper.class);
    job.setOutputFormat(xmlOut ? MultipleXMLLangFileOutputFormat.class : MultipleLangFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Page.class);

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir));

    Path outDir = new Path(new Path(crawlDir, CORPUS_SUBDIR),
            "corpus." + (xmlOut ? PARAM_XML + "." : "") + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    return job;
}

From source file:babel.prep.datedcorpus.DatedCorpusGenerator.java

License:Apache License

/**
 * Configures a map-only dataset generation job.
 *///w w w.  ja v a2  s  .c om
protected JobConf createJobConf(String crawlDir, String pagesSubDir) throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("create dated dataset from " + pagesSubDir);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(DatedCorpusGenMapper.class);
    job.setReducerClass(DatedCorpusGenReducer.class);

    job.setMapOutputValueClass(PageVersion.class);
    job.setOutputFormat(DatedLangFilesOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir));

    Path outDir = new Path(new Path(crawlDir, CORPUS_SUBDIR), "datedcorpus." + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    return job;
}

From source file:babel.prep.extract.NutchPageExtractor.java

License:Apache License

/**
 * Configures the extraction job.//w  w  w .j  a  v  a 2  s .  c  om
 */
protected JobConf createJobConf(String crawlDir) throws IOException {
    Path segmentsPath = new Path(crawlDir, SEGMENTS_SUBDIR);

    List<Path> segPaths = allSegmentDirs(segmentsPath);
    StringBuilder allSegNames = new StringBuilder();

    for (int i = 0; i < segPaths.size(); i++) {
        allSegNames.append(" " + segPaths.get(i).getName());
    }

    String timeStamp = getCurTimeStamp();

    JobConf job = new NutchJob(getConf());
    job.setJobName("read segments" + allSegNames.toString());

    // Specify what info to extract
    job.setBoolean("segment.reader.co", m_co);
    job.setBoolean("segment.reader.fe", m_fe);
    job.setBoolean("segment.reader.ge", m_ge);
    job.setBoolean("segment.reader.pa", m_pa);
    job.setBoolean("segment.reader.pd", m_pd);
    job.setBoolean("segment.reader.pt", m_pt);

    // Specify the paths to extract from for each segment
    for (int i = 0; i < segPaths.size(); i++) {
        if (m_ge)
            FileInputFormat.addInputPath(job, new Path(segPaths.get(i), CrawlDatum.GENERATE_DIR_NAME));
        if (m_fe)
            FileInputFormat.addInputPath(job, new Path(segPaths.get(i), CrawlDatum.FETCH_DIR_NAME));
        if (m_pa)
            FileInputFormat.addInputPath(job, new Path(segPaths.get(i), CrawlDatum.PARSE_DIR_NAME));
        if (m_co)
            FileInputFormat.addInputPath(job, new Path(segPaths.get(i), Content.DIR_NAME));
        if (m_pd)
            FileInputFormat.addInputPath(job, new Path(segPaths.get(i), ParseData.DIR_NAME));
        if (m_pt)
            FileInputFormat.addInputPath(job, new Path(segPaths.get(i), ParseText.DIR_NAME));
    }

    // Specify the segments directory so that mapper can recover segment info
    job.set(JOB_PROP_SEGMENTS_DIR, segmentsPath.getName());
    // Store the start time/date of this job
    job.set(JOB_PROP_JOB_TIMESTAMP, timeStamp);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(PageExtMapper.class);
    job.setReducerClass(PageExtReducer.class);

    job.setMapOutputValueClass(NutchChunk.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Page.class);

    Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.extract." + timeStamp);
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    return job;
}

From source file:babel.prep.langid.LangIdentifier.java

License:Apache License

/**
 * Configures a map-only language id job.
 *///  w  w w.jav  a  2 s.c o  m
protected JobConf createJobConf(String crawlDir, String pagesSubDir, String referrer) throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("identify languages for pages in " + pagesSubDir);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(LangIdMapper.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Page.class);

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir));

    Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.langid." + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    job.set(JOB_PROP_JOB_REFERRER, referrer);

    return job;
}

From source file:babel.prep.langidtime.LangAndTimeExtractor.java

License:Apache License

/**
 * Configures a map-only language id job.
 *//*w w  w .  ja  v a  2s  .c  o  m*/
protected JobConf createJobConf(String crawlDir, String pagesSubDir, String referrer) throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("identify languages and collect time for pages in " + pagesSubDir);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(LangAndTimeMapper.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Page.class);

    //ANNI EDIT
    job.setNumMapTasks(2);
    job.setNumReduceTasks(2);
    //END ANNI EDIT

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir));

    Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.langidtime." + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    job.set(JOB_PROP_JOB_REFERRER, referrer);

    return job;
}