List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass
public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException
From source file:DAAL.LinearRegressionNormEq.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "Linear regression with normal equations method (normEq) Job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LinearRegressionNormEqStep1TrainingMapper.class); job.setReducerClass(LinearRegressionNormEqStep2TrainingReducerAndPrediction.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(LinearRegressionNormEq.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:DAAL.LinearRegressionQR.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "Linear regression with normal equations method (normEq) Job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LinearRegressionQRStep1TrainingMapper.class); job.setReducerClass(LinearRegressionQRStep2TrainingReducerAndPrediction.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(LinearRegressionQR.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:DAAL.LowOrderMomentsDense.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "Low Order Moments (dense) Job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LowOrderMomentsDenseStep1Mapper.class); job.setReducerClass(LowOrderMomentsDenseStep2Reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(LowOrderMomentsDense.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:DAAL.LowOrderMomentsSparse.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "Low Order Moments (sparse) Job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LowOrderMomentsSparseStep1Mapper.class); job.setReducerClass(LowOrderMomentsSparseStep2Reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(LowOrderMomentsSparse.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:DAAL.NaiveBayes.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "Naive Bayes Job(training)"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(NaiveBayesStep1TrainingMapper.class); job.setReducerClass(NaiveBayesStep2TrainingReducerAndPrediction.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(NaiveBayes.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:DAAL.PcaCor.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "PCA Job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(PcaCorStep1Mapper.class); job.setReducerClass(PcaCorStep2Reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(PcaCor.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:DAAL.PcaSvd.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "PCA Job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(PcaSvdStep1Mapper.class); job.setReducerClass(PcaSvdStep2Reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(PcaSvd.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:DAAL.QR.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "QR Job (step1 and step2)"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path("/Hadoop/QR/step2")); job.setMapperClass(QRStep1Mapper.class); job.setReducerClass(QRStep2Reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(QR.class); job.waitForCompletion(true);/*from w w w. j a v a2 s .c o m*/ Job job1 = new Job(conf, "QR Job (step3)"); FileInputFormat.setInputPaths(job1, new Path("/Hadoop/QR/step2")); FileOutputFormat.setOutputPath(job1, new Path(args[1])); job1.setMapperClass(QRStep3Mapper.class); job1.setNumReduceTasks(0); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setJarByClass(QR.class); return job1.waitForCompletion(true) ? 0 : 1; }
From source file:DAAL.SVD.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "SVD Job (step1 and step2)"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path("/Hadoop/SVD/step2")); job.setMapperClass(SVDStep1Mapper.class); job.setReducerClass(SVDStep2Reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(SVD.class); job.waitForCompletion(true);/* w w w . j a v a 2 s.co m*/ Job job1 = new Job(conf, "SVD Job (step3)"); FileInputFormat.setInputPaths(job1, new Path("/Hadoop/SVD/step2")); FileOutputFormat.setOutputPath(job1, new Path(args[1])); job1.setMapperClass(SVDStep3Mapper.class); job1.setNumReduceTasks(0); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setJarByClass(SVD.class); return job1.waitForCompletion(true) ? 0 : 1; }
From source file:DataCubeRefresh.Grep.java
License:Apache License
/** * Run function.// ww w. j a v a 2s . co m * @param args arguments * @return error code * @throws Exception if an exception occurs */ public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inUrl> <outUrl> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return -1; } Job grepJob = new Job(getConf()); Job sortJob = new Job(getConf()); String tempStreamTag = UUID.randomUUID().toString(); try { grepJob.setJobName("grep-search"); TextHStreamingInputFormat.addInputStream(grepJob, 1000, 600, -1, "", false, args[0]); HStreamingJobConf.setIsStreamingJob(grepJob, true); grepJob.setMapperClass(RegexMapper.class); grepJob.getConfiguration().set("mapred.mapper.regex", args[2]); if (args.length == 4) grepJob.getConfiguration().set("mapred.mapper.regex.group", args[3]); grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); grepJob.setInputFormatClass(TextHStreamingInputFormat.class); grepJob.setOutputFormatClass(TextHStreamingOutputFormat.class); HStreamingOutputFormat.setOutputStreamTag(grepJob, tempStreamTag); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); grepJob.setJobName("grep-search"); grepJob.setJarByClass(this.getClass()); grepJob.submit(); sortJob.setJobName("grep-sort"); sortJob.setInputFormatClass(TextHStreamingInputFormat.class); HStreamingJobConf.setIsStreamingJob(sortJob, true); // add previous stream partition/reducer 0 as input. HStreamingInputFormat.addInputStreamTag(sortJob, tempStreamTag, 0); sortJob.setMapperClass(InverseTextMapper.class); sortJob.setNumReduceTasks(1); // single output stream sortJob.setOutputFormatClass(TextHStreamingOutputFormat.class); TextHStreamingOutputFormat.setOutputPath(sortJob, args[1]); sortJob.setSortComparatorClass( // sort by decreasing fre LongWritable.DecreasingComparator.class); sortJob.setJarByClass(this.getClass()); sortJob.submit(); return sortJob.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); try { grepJob.killJob(); } catch (Exception e1) { // ignore } try { sortJob.killJob(); } catch (Exception e2) { // ignore } } return 0; }