Example usage for org.apache.hadoop.mapreduce Job setJobName

List of usage examples for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException 

Source Link

Document

Set the user-specified job name.

Usage

From source file:com.twitter.algebra.matrix.multiply.AtB_DMJ.java

License:Apache License

/**
 * Perform A x B, where At and B refer to the paths that contain matrices in
 * {@link SequenceFileInputFormat}. One of At and B must also conform with
 * {@link MapDir} format. Refer to {@link AtB_DMJ} for further details.
 * /*from w  w  w.java  2s . c  o  m*/
 * @param conf the initial configuration
 * @param mapDirPath path to the matrix in {@link MapDir} format
 * @param matrixInputPaths the list of paths to matrix input partitions over
 *          which we iterate
 * @param matrixOutputPath path to which AxB will be written
 * @param atCols number of columns of At (rows of A)
 * @param bCols
 * @param colsPerPartition cols per partition of the input matrix (whether At or B)
 * @param aIsMapDir is A chosen to be loaded as MapDir
 * @param useInMemCombiner
 * @param numberOfJobs the hint for the desired number of parallel jobs
 * @return the running job
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public Job run(Configuration conf, Path mapDirPath, Path matrixInputPaths, Path matrixOutputPath, int atCols,
        int bCols, int colsPerPartition, boolean aIsMapDir, boolean useInMemCombiner)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.set(MATRIXINMEMORY, mapDirPath.toString());
    conf.setBoolean(AISMAPDIR, aIsMapDir);
    conf.setBoolean(USEINMEMCOMBINER, useInMemCombiner);
    conf.setInt(RESULTROWS, atCols);
    conf.setInt(RESULTCOLS, bCols);
    conf.setInt(PARTITIONCOLS, colsPerPartition);
    FileSystem fs = FileSystem.get(matrixOutputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPaths, "dmj");

    if (useInMemCombiner) {
        Configuration newConf = new Configuration(conf);
        newConf.set("mapreduce.task.io.sort.mb", "1");
        conf = newConf;
    }

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(AtB_DMJ.class);
    job.setJobName(AtB_DMJ.class.getSimpleName());
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    matrixInputPaths = fs.makeQualified(matrixInputPaths);
    MultipleInputs.addInputPath(job, matrixInputPaths, SequenceFileInputFormat.class);

    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    if (!useInMemCombiner)
        job.setCombinerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "dmj");
    job.setNumReduceTasks(numReducers);
    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, atCols);

    job.setReducerClass(EpsilonReducer.class);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    return job;
}

From source file:com.twitter.algebra.matrix.multiply.PartitionerJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows, int partitions)
        throws IOException, InterruptedException, ClassNotFoundException {
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(PartitionerJob.class);
    job.setJobName(PartitionerJob.class.getSimpleName() + "-" + matrixOutputPath.getName());
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);

    job.setNumReduceTasks(partitions);/*from  ww w .  j  a  v a 2  s . com*/

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(IdMapper.class);
    job.setReducerClass(IdReducer.class);

    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, aRows);

    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.matrix.text.Matrix2TextJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(Matrix2TextJob.class);
    job.setJobName(Matrix2TextJob.class.getSimpleName());
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    //    FileInputFormat.addInputPath(job, matrixInputPath);
    MultipleInputs.addInputPath(job, matrixInputPath, SequenceFileInputFormat.class);
    //    job.setInputFormatClass(SequenceFileInputFormat.class);
    TextOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setNumReduceTasks(0);//from w w w.ja v  a 2s. c o  m

    job.setOutputFormatClass(TextOutputFormat.class);
    //    job.setOutputKeyClass(IntWritable.class);
    //    job.setOutputValueClass(org.apache.hadoop.io.Text);
    job.setMapperClass(IdMapper.class);

    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.ColPartitionJob.java

License:Apache License

/**
 * Partition A on columns, where A refers to the path that contain a matrix in
 * {@link SequenceFileInputFormat}. Refer to {@link ColPartitionJob} for
 * further details.//  ww w. ja v a 2 s  .co m
 * 
 * @param conf the initial configuration
 * @param matrixInputPath the path to the input matrix A
 * @param matrixOutputPath the path of the resulting partitioned matrix
 * @param numInputRows rows
 * @param numInputCols cols
 * @param numColPartitions the hint for the desired number of column
 *          partitions
 * @return the running job
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public Job run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int numInputRows,
        int numInputCols, int numColPartitions)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    FileSystem fs = FileSystem.get(matrixOutputPath.toUri(), conf);
    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "colpartition");

    int colPartSize = getColPartitionSize(numInputCols, numColPartitions);
    numColPartitions = (int) Math.ceil(numInputCols / (double) colPartSize);

    if (numReducers < numColPartitions)
        numReducers = numColPartitions;

    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "colpartition");

    conf.setInt(NUM_ORIG_ROWS_KEY, numInputRows);
    conf.setInt(NUM_ORIG_COLS_KEY, numInputCols);
    conf.setInt(NUM_COL_PARTITIONS, numColPartitions);
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(ColPartitionJob.class);
    job.setJobName(ColPartitionJob.class.getSimpleName());

    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    MultipleInputs.addInputPath(job, matrixInputPath, SequenceFileInputFormat.class);

    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(ElementWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    RowColPartitioner.setPartitioner(job, RowColPartitioner.ElementRowColPartitioner.class, numInputRows,
            numInputCols, numColPartitions);

    job.setReducerClass(MyReducer.class);
    job.setNumReduceTasks(numReducers);

    //    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
    return job;
}

From source file:com.twitter.algebra.nmf.CombinerJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);
    //    conf.setBoolean("mapreduce.output.compress", true);
    //    conf.setBoolean("mapreduce.output.fileoutputformat.compress", true);
    //    conf.set("mapreduce.output.fileoutputformat.compress.codec", "com.hadoop.compression.lzo.LzoCodec");
    conf.setInt("dfs.replication", 20);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(CombinerJob.class);
    job.setJobName(CombinerJob.class.getSimpleName() + "-" + matrixOutputPath.getName());
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "combiner");
    job.setNumReduceTasks(numReducers);// TODO: make it a parameter

    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(IdMapper.class);
    job.setReducerClass(MergeVectorsReducer.class);

    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, aRows);

    job.submit();//from   ww  w  .ja v  a2  s.com
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.CompositeDMJ.java

License:Apache License

public Job run(Configuration conf, Path mapDirPath, Path matrixInputPaths, Path matrixOutputPath, int atCols,
        boolean aIsMapDir, String inMemCStr, int inMemCRows, int inMemCCols, float alpha1, float alpha2)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);
    conf.set(MATRIXINMEMORY, inMemCStr);
    conf.setInt(MATRIXINMEMORYROWS, inMemCRows);
    conf.setInt(MATRIXINMEMORYCOLS, inMemCCols);

    conf.setFloat(ALPHA1, alpha1);/*from   w  w w  .jav  a2  s  .c o  m*/
    conf.setFloat(ALPHA2, alpha2);

    FileSystem fs = FileSystem.get(matrixOutputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPaths, "compositedmj");

    conf.set(MAPDIRMATRIX, mapDirPath.toString());
    conf.setBoolean(AISMAPDIR, aIsMapDir);
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(CompositeDMJ.class);
    job.setJobName(CompositeDMJ.class.getSimpleName() + "-" + matrixOutputPath.getName());
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    matrixInputPaths = fs.makeQualified(matrixInputPaths);
    MultipleInputs.addInputPath(job, matrixInputPaths, SequenceFileInputFormat.class);

    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, atCols);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    return job;
}

From source file:com.twitter.algebra.nmf.DistRndMatrixJob.java

License:Apache License

public void run(Configuration conf, Path inPath, Path matrixOutputPath, int numInputRows, int numInputCols)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setInt(ROWS, numInputRows);//w w w  . ja v a 2s  .  c  om
    conf.setInt(COLS, numInputCols);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(DistRndMatrixJob.class);
    job.setJobName(DistRndMatrixJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    FileSystem fs = FileSystem.get(inPath.toUri(), conf);
    inPath = fs.makeQualified(inPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, inPath);
    job.setInputFormatClass(TextInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numInputRows);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "random");
    job.setNumReduceTasks(numReducers);

    job.setReducerClass(MyReducer.class);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.Edge2MapDirJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int numInputRows,
        int numInputCols, String name) throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.set(INDEXNAME, name);/*from  w w  w .ja  va  2  s.  c o  m*/
    conf.setInt(ROWS, numInputRows);
    conf.setInt(COLS, numInputCols);
    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t");
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "edge2matrix");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(Edge2MapDirJob.class);
    job.setJobName(Edge2MapDirJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "edge2matrix");
    job.setNumReduceTasks(numReducers);
    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numInputRows);

    job.setCombinerClass(MergeVectorsCombiner.class);
    job.setReducerClass(MergeVectorsReducer.class);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.ErrDMJ.java

License:Apache License

public Job run(Configuration conf, Path xPath, Path matrixAInputPath, Path ytPath, Path outPath, int aRows,
        int ytRows, int ytCols) throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.set(MAPDIRMATRIXX, xPath.toString());
    conf.set(MAPDIRMATRIXYT, ytPath.toString());
    conf.setInt(YTROWS, ytRows);// w w  w  . j  av a2 s  .  c o  m
    conf.setInt(YTCOLS, ytCols);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixAInputPath, "err");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(ErrDMJ.class);
    job.setJobName(ErrDMJ.class.getSimpleName() + "-" + outPath.getName());

    matrixAInputPath = fs.makeQualified(matrixAInputPath);
    MultipleInputs.addInputPath(job, matrixAInputPath, SequenceFileInputFormat.class);

    outPath = fs.makeQualified(outPath);
    FileOutputFormat.setOutputPath(job, outPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    int numReducers = 1;
    job.setNumReduceTasks(numReducers);
    job.setCombinerClass(SumVectorsReducer.class);
    job.setReducerClass(SumVectorsReducer.class);

    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed! ");
    return job;
}

From source file:com.twitter.algebra.nmf.ReindexerJob.java

License:Apache License

public Job run(Configuration conf, Path matrixInputPath, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(ReindexerJob.class);
    job.setJobName(ReindexerJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setReducerClass(MyReducer.class);
    // this makes the reindexing very slow but is necessary to have total order
    job.setNumReduceTasks(1);/*  w w  w  .ja v a 2 s  . c  om*/

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
    return job;
}