Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:DAAL.LinearRegressionNormEq.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    /* Put shared libraries into the distributed cache */
    DistributedCache.createSymlink(conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf);

    Job job = new Job(conf, "Linear regression with normal equations method (normEq) Job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LinearRegressionNormEqStep1TrainingMapper.class);
    job.setReducerClass(LinearRegressionNormEqStep2TrainingReducerAndPrediction.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WriteableData.class);
    job.setJarByClass(LinearRegressionNormEq.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:DAAL.LinearRegressionQR.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    /* Put shared libraries into the distributed cache */
    DistributedCache.createSymlink(conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf);

    Job job = new Job(conf, "Linear regression with normal equations method (normEq) Job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LinearRegressionQRStep1TrainingMapper.class);
    job.setReducerClass(LinearRegressionQRStep2TrainingReducerAndPrediction.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WriteableData.class);
    job.setJarByClass(LinearRegressionQR.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:DAAL.LowOrderMomentsDense.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    /* Put shared libraries into the distributed cache */
    DistributedCache.createSymlink(conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf);

    Job job = new Job(conf, "Low Order Moments (dense) Job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LowOrderMomentsDenseStep1Mapper.class);
    job.setReducerClass(LowOrderMomentsDenseStep2Reducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WriteableData.class);

    job.setJarByClass(LowOrderMomentsDense.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:DAAL.LowOrderMomentsSparse.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    /* Put shared libraries into the distributed cache */
    DistributedCache.createSymlink(conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf);

    Job job = new Job(conf, "Low Order Moments (sparse) Job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LowOrderMomentsSparseStep1Mapper.class);
    job.setReducerClass(LowOrderMomentsSparseStep2Reducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WriteableData.class);

    job.setJarByClass(LowOrderMomentsSparse.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:DAAL.NaiveBayes.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    /* Put shared libraries into the distributed cache */
    DistributedCache.createSymlink(conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf);

    Job job = new Job(conf, "Naive Bayes Job(training)");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(NaiveBayesStep1TrainingMapper.class);
    job.setReducerClass(NaiveBayesStep2TrainingReducerAndPrediction.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WriteableData.class);
    job.setJarByClass(NaiveBayes.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:DAAL.PcaCor.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    /* Put shared libraries into the distributed cache */
    DistributedCache.createSymlink(conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf);

    Job job = new Job(conf, "PCA Job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(PcaCorStep1Mapper.class);
    job.setReducerClass(PcaCorStep2Reducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WriteableData.class);

    job.setJarByClass(PcaCor.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:DAAL.PcaSvd.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    /* Put shared libraries into the distributed cache */
    DistributedCache.createSymlink(conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf);

    Job job = new Job(conf, "PCA Job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(PcaSvdStep1Mapper.class);
    job.setReducerClass(PcaSvdStep2Reducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WriteableData.class);

    job.setJarByClass(PcaSvd.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:DAAL.QR.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    /* Put shared libraries into the distributed cache */
    DistributedCache.createSymlink(conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf);

    Job job = new Job(conf, "QR Job (step1 and step2)");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path("/Hadoop/QR/step2"));

    job.setMapperClass(QRStep1Mapper.class);
    job.setReducerClass(QRStep2Reducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WriteableData.class);

    job.setJarByClass(QR.class);

    job.waitForCompletion(true);/*from   w  w w. j a  v a2 s .c  o m*/

    Job job1 = new Job(conf, "QR Job (step3)");

    FileInputFormat.setInputPaths(job1, new Path("/Hadoop/QR/step2"));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));

    job1.setMapperClass(QRStep3Mapper.class);
    job1.setNumReduceTasks(0);

    job1.setInputFormatClass(SequenceFileInputFormat.class);

    job1.setJarByClass(QR.class);

    return job1.waitForCompletion(true) ? 0 : 1;

}

From source file:DAAL.SVD.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    /* Put shared libraries into the distributed cache */
    DistributedCache.createSymlink(conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf);
    DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf);

    Job job = new Job(conf, "SVD Job (step1 and step2)");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path("/Hadoop/SVD/step2"));

    job.setMapperClass(SVDStep1Mapper.class);
    job.setReducerClass(SVDStep2Reducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WriteableData.class);

    job.setJarByClass(SVD.class);

    job.waitForCompletion(true);/* w w w  .  j  a v a  2 s.co  m*/

    Job job1 = new Job(conf, "SVD Job (step3)");

    FileInputFormat.setInputPaths(job1, new Path("/Hadoop/SVD/step2"));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));

    job1.setMapperClass(SVDStep3Mapper.class);
    job1.setNumReduceTasks(0);

    job1.setInputFormatClass(SequenceFileInputFormat.class);

    job1.setJarByClass(SVD.class);

    return job1.waitForCompletion(true) ? 0 : 1;

}

From source file:DataCubeRefresh.Grep.java

License:Apache License

/**
 * Run function.//  ww  w. j a v  a 2s .  co m
 * @param args arguments
 * @return error code
 * @throws Exception if an exception occurs
 */
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inUrl> <outUrl> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    Job grepJob = new Job(getConf());
    Job sortJob = new Job(getConf());

    String tempStreamTag = UUID.randomUUID().toString();

    try {
        grepJob.setJobName("grep-search");

        TextHStreamingInputFormat.addInputStream(grepJob, 1000, 600, -1, "", false, args[0]);
        HStreamingJobConf.setIsStreamingJob(grepJob, true);
        grepJob.setMapperClass(RegexMapper.class);
        grepJob.getConfiguration().set("mapred.mapper.regex", args[2]);
        if (args.length == 4)
            grepJob.getConfiguration().set("mapred.mapper.regex.group", args[3]);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);
        grepJob.setInputFormatClass(TextHStreamingInputFormat.class);
        grepJob.setOutputFormatClass(TextHStreamingOutputFormat.class);
        HStreamingOutputFormat.setOutputStreamTag(grepJob, tempStreamTag);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);
        grepJob.setJobName("grep-search");
        grepJob.setJarByClass(this.getClass());

        grepJob.submit();

        sortJob.setJobName("grep-sort");
        sortJob.setInputFormatClass(TextHStreamingInputFormat.class);
        HStreamingJobConf.setIsStreamingJob(sortJob, true);

        // add previous stream partition/reducer 0 as input. 
        HStreamingInputFormat.addInputStreamTag(sortJob, tempStreamTag, 0);

        sortJob.setMapperClass(InverseTextMapper.class);
        sortJob.setNumReduceTasks(1); // single output stream
        sortJob.setOutputFormatClass(TextHStreamingOutputFormat.class);
        TextHStreamingOutputFormat.setOutputPath(sortJob, args[1]);
        sortJob.setSortComparatorClass( // sort by decreasing fre
                LongWritable.DecreasingComparator.class);
        sortJob.setJarByClass(this.getClass());
        sortJob.submit();

        return sortJob.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
        try {
            grepJob.killJob();
        } catch (Exception e1) {
            // ignore
        }
        try {
            sortJob.killJob();
        } catch (Exception e2) {
            // ignore
        }
    }
    return 0;
}