List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass
public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException
From source file:com.tomslabs.grid.avro.AvroWordCount.java
License:Apache License
public static Job createSubmitableJob(final Configuration conf, final Path inputPath, final Path outputPath) throws IOException { conf.set(AvroFileOutputFormat.OUTPUT_SCHEMA, WordCountSchema.getSchema().toString()); conf.setInt("mapred.max.split.size", 1024000); conf.setInt("mapred.reduce.tasks", 10); conf.setBoolean("mapred.reduce.tasks.speculative.execution", true); final Job job = new Job(conf, "Word Count"); job.setJarByClass(AvroWordCount.class); job.setInputFormatClass(AvroFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(GenericRecord.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(AvroFileOutputFormat.class); AvroFileOutputFormat.setDeflateLevel(job, 3); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); return job;/*from www .j ava 2 s . c o m*/ }
From source file:com.toshiba.mwcloud.gs.hadoop.mapreduce.examples.GSWordCount.java
License:Apache License
/** * <div lang="ja">/* w w w . jav a 2s.c o m*/ * WordCount?MapReduce??? * @param args * @return ???0????????1 * @throws Exception ?????? * </div><div lang="en"> * Run a MapReduce job of WordCount. * @param args command argument * @return 0 for normal termination of the job and 1 otherwise * @throws Exception processing failed. * </div> */ public int run(String[] args) throws Exception { GSConf gsConf = new GSConf(); gsConf.parseArg(args); Configuration conf = getConf(); gsConf.setup(conf); Job job = Job.getInstance(conf, APP_NAME); job.setJarByClass(GSWordCount.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(GSRowWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(GSRowInputFormat.class); job.setOutputFormatClass(GSRowOutputFormat.class); int res = job.waitForCompletion(true) ? 0 : 1; if (res == 0) { printResult(gsConf); } return res; }
From source file:com.twitter.algebra.matrix.format.Sequence2MatrixFormatJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "seq2mtx"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(Sequence2MatrixFormatJob.class); job.setJobName(Sequence2MatrixFormatJob.class.getSimpleName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setNumReduceTasks(0);/*from w w w . ja v a 2 s .c o m*/ job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.matrix.multiply.ABInnerHDFSBroadcastOfB.java
License:Apache License
/** * Perform A x B, where A and B refer to the paths that contain matrices in * {@link SequenceFileInputFormat} Refer to {@link ABInnerHDFSBroadcastOfB} * for further details./*w w w .j a v a 2 s. c o m*/ * * @param conf the initial configuration * @param matrixInputPath path to matrix A * @param inMemMatrixDir path to matrix B (must be small enough to fit into * memory) * @param matrixOutputPath path to which AxB will be written * @param inMemMatrixNumRows B rows * @param inMemMatrixNumCols B cols * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run(Configuration conf, Path matrixInputPath, String inMemMatrixDir, Path matrixOutputPath, int inMemMatrixNumRows, int inMemMatrixNumCols) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "axbinner"); conf.set(MATRIXINMEMORY, inMemMatrixDir); conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows); conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(ABInnerHDFSBroadcastOfB.class); job.setJobName(ABInnerHDFSBroadcastOfB.class.getSimpleName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); // since we do not use reducer, to get total order, the map output files has // to be renamed after this function returns: {@link // AlgebraCommon#fixPartitioningProblem} job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.matrix.multiply.ABOuterHDFSBroadcastOfA.java
License:Apache License
/** * Perform A x B, where A and B refer to the paths that contain matrices in * {@link SequenceFileInputFormat} Refer to {@link ABOuterHDFSBroadcastOfA} * for further details.//w ww.j a v a 2 s .c om * * @param conf * the initial configuration * @param matrixInputPath * path to matrix A * @param inMemMatrixDir * path to matrix B (must be small enough to fit into memory) * @param matrixOutputPath * path to which AxB will be written * @param inMemMatrixNumRows * B rows * @param inMemMatrixNumCols * B cols * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run(Configuration conf, String inMemMatrixDir, Path matrixInputPath, Path matrixOutputPath, int inMemMatrixNumRows, int inMemMatrixNumCols) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MATRIXINMEMORY, inMemMatrixDir); conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows); conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(ABOuterHDFSBroadcastOfA.class); job.setJobName(ABOuterHDFSBroadcastOfA.class.getSimpleName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); // ensures total order (when used with {@link MatrixOutputFormat}), RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, inMemMatrixNumRows); job.setCombinerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class); job.setReducerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.matrix.multiply.AtBOuterStaticMapsideJoinJob.java
License:Apache License
public void run(Configuration conf, Path atPath, Path bPath, Path outPath, int outCardinality) throws IOException, InterruptedException, ClassNotFoundException { conf.setInt(OUT_CARD, outCardinality); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJobName(AtBOuterStaticMapsideJoinJob.class.getSimpleName()); job.setJarByClass(AtBOuterStaticMapsideJoinJob.class); FileSystem fs = FileSystem.get(atPath.toUri(), conf); atPath = fs.makeQualified(atPath);/*from www. j ava 2 s .c o m*/ bPath = fs.makeQualified(bPath); job.setInputFormatClass(CompositeInputFormat.class); //mapside join expression job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose("inner", SequenceFileInputFormat.class, atPath, bPath)); job.setOutputFormatClass(MatrixOutputFormat.class); outPath = fs.makeQualified(outPath); FileOutputFormat.setOutputPath(job, outPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setCombinerClass(MyReducer.class); int numReducers = conf.getInt("algebra.reduceslots.multiply", 10); job.setNumReduceTasks(numReducers); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed"); }
From source file:com.twitter.algebra.matrix.multiply.AtB_DMJ.java
License:Apache License
/** * Perform A x B, where At and B refer to the paths that contain matrices in * {@link SequenceFileInputFormat}. One of At and B must also conform with * {@link MapDir} format. Refer to {@link AtB_DMJ} for further details. * //from w w w .j a v a 2 s .c o m * @param conf the initial configuration * @param mapDirPath path to the matrix in {@link MapDir} format * @param matrixInputPaths the list of paths to matrix input partitions over * which we iterate * @param matrixOutputPath path to which AxB will be written * @param atCols number of columns of At (rows of A) * @param bCols * @param colsPerPartition cols per partition of the input matrix (whether At or B) * @param aIsMapDir is A chosen to be loaded as MapDir * @param useInMemCombiner * @param numberOfJobs the hint for the desired number of parallel jobs * @return the running job * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public Job run(Configuration conf, Path mapDirPath, Path matrixInputPaths, Path matrixOutputPath, int atCols, int bCols, int colsPerPartition, boolean aIsMapDir, boolean useInMemCombiner) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.set(MATRIXINMEMORY, mapDirPath.toString()); conf.setBoolean(AISMAPDIR, aIsMapDir); conf.setBoolean(USEINMEMCOMBINER, useInMemCombiner); conf.setInt(RESULTROWS, atCols); conf.setInt(RESULTCOLS, bCols); conf.setInt(PARTITIONCOLS, colsPerPartition); FileSystem fs = FileSystem.get(matrixOutputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPaths, "dmj"); if (useInMemCombiner) { Configuration newConf = new Configuration(conf); newConf.set("mapreduce.task.io.sort.mb", "1"); conf = newConf; } @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(AtB_DMJ.class); job.setJobName(AtB_DMJ.class.getSimpleName()); matrixOutputPath = fs.makeQualified(matrixOutputPath); matrixInputPaths = fs.makeQualified(matrixInputPaths); MultipleInputs.addInputPath(job, matrixInputPaths, SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); if (!useInMemCombiner) job.setCombinerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class); int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "dmj"); job.setNumReduceTasks(numReducers); // ensures total order (when used with {@link MatrixOutputFormat}), RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, atCols); job.setReducerClass(EpsilonReducer.class); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); return job; }
From source file:com.twitter.algebra.matrix.multiply.PartitionerJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows, int partitions) throws IOException, InterruptedException, ClassNotFoundException { @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(PartitionerJob.class); job.setJobName(PartitionerJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setNumReduceTasks(partitions);/* w w w . j a v a 2s . c o m*/ job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(IdMapper.class); job.setReducerClass(IdReducer.class); RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, aRows); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.matrix.text.Matrix2TextJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(Matrix2TextJob.class); job.setJobName(Matrix2TextJob.class.getSimpleName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); // FileInputFormat.addInputPath(job, matrixInputPath); MultipleInputs.addInputPath(job, matrixInputPath, SequenceFileInputFormat.class); // job.setInputFormatClass(SequenceFileInputFormat.class); TextOutputFormat.setOutputPath(job, matrixOutputPath); job.setNumReduceTasks(0);/*from w ww. j av a2 s. co m*/ job.setOutputFormatClass(TextOutputFormat.class); // job.setOutputKeyClass(IntWritable.class); // job.setOutputValueClass(org.apache.hadoop.io.Text); job.setMapperClass(IdMapper.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.nmf.CombinerJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); // conf.setBoolean("mapreduce.output.compress", true); // conf.setBoolean("mapreduce.output.fileoutputformat.compress", true); // conf.set("mapreduce.output.fileoutputformat.compress.codec", "com.hadoop.compression.lzo.LzoCodec"); conf.setInt("dfs.replication", 20); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(CombinerJob.class); job.setJobName(CombinerJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "combiner"); job.setNumReduceTasks(numReducers);// TODO: make it a parameter job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(IdMapper.class); job.setReducerClass(MergeVectorsReducer.class); RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, aRows); job.submit();/*from w w w . ja va 2 s. c om*/ boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }