Example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks

List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks.

Prototype

public void setNumReduceTasks(int n) 

Source Link

Document

Set the requisite number of reduce tasks for this job.

Usage

From source file:nthu.scopelab.tsqr.ssvd.itBtJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p,
        int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis)
        throws Exception {
    boolean outputQ = true;

    String stages[] = reduceSchedule.split(",");

    JobConf job = new JobConf(conf, itBtJob.class);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setInt(SCHEDULE_NUM, stages.length);
    job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight);
    job.setInt(QJob.PROP_K, k);//from  ww w. java  2 s .  co m
    job.setInt(QJob.PROP_P, p);
    job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ);
    job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts);
    job.set(QmultiplyJob.QRF_DIR, qrfPath);
    FileSystem.get(job).delete(btPath, true);

    FileOutputFormat.setOutputPath(job, btPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setJobName("itBtJob");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    //job.setOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(BtMapper.class);
    job.setCombinerClass(OuterProductCombiner.class);
    job.setReducerClass(OuterProductReducer.class);

    fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job));
    mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job));
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    //job.setNumReduceTasks(0);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, inputPath);

    if (outputQ) {
        MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class,
                IntWritable.class, LMatrixWritable.class);
    }
    if (outputBBtProducts) {
        MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
    }
    RunningJob rj = JobClient.runJob(job);
    System.out.println("itBtJob Job ID: " + rj.getJobID().toString());
}

From source file:nthu.scopelab.tsqr.ssvd.itQJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPaths, String outputPath, String reduceSchedule, int k,
        int p, long seed, int mis) throws ClassNotFoundException, InterruptedException, IOException {

    String stages[] = reduceSchedule.split(",");
    String rinput = "";
    String routput = outputPath + "/iter-r-";

    for (int i = 0; i < stages.length; i++) {
        String thenumber = Integer.toString(i + 1);
        JobConf job = new JobConf(conf, itQJob.class);
        job.setJobName("itQ-job-" + thenumber);
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setOutputFormat(SequenceFileOutputFormat.class);

        if (i == 0)
            job.setMapperClass(QMapper.class);
        else/*from  ww w  .jav a 2 s .  c o m*/
            job.setMapperClass(IdentityMapper.class);

        job.setReducerClass(QReducer.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(LMatrixWritable.class);

        FileSystem fs = FileSystem.get(job);
        Path Paths[];
        fileGather fgather = null;
        if (i == 0)
            fgather = new fileGather(inputPaths, "part", fs);
        else
            fgather = new fileGather(new Path(rinput), "part", fs);
        Paths = fgather.getPaths();
        mis = Checker.checkMis(mis, fgather.getInputSize(), fs);
        job.setNumMapTasks(fgather.recNumMapTasks(mis));

        job.setNumReduceTasks(Integer.parseInt(stages[i]));

        job.setInt(QRFirstJob.COLUMN_SIZE, k + p);
        job.setLong(PROP_OMEGA_SEED, seed);
        job.setInt(PROP_K, k);
        job.setInt(PROP_P, p);

        fs.delete(new Path(routput + thenumber), true);

        FileInputFormat.setInputPaths(job, Paths);

        FileOutputFormat.setOutputPath(job, new Path(routput + thenumber));

        //FileOutputFormat.setCompressOutput(job, true);
        //FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
        //SequenceFileOutputFormat.setOutputCompressionType(job,CompressionType.BLOCK);
        //output first level Q
        MultipleOutputs.addNamedOutput(job, QF_MAT, SequenceFileOutputFormat.class, IntWritable.class,
                LMatrixWritable.class);

        RunningJob rj = JobClient.runJob(job);
        System.out.println("itQJob Job ID: " + rj.getJobID().toString());
        rinput = routput + thenumber;
    }
}

From source file:nthu.scopelab.tsqr.ssvd.QJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPaths, String outputPath, String reduceSchedule, int k,
        int p, long seed, int mis) throws ClassNotFoundException, InterruptedException, IOException {

    String stages[] = reduceSchedule.split(",");
    String rinput = "";
    String routput = outputPath + "/iter-r-";

    for (int i = 0; i < stages.length; i++) {
        String thenumber = Integer.toString(i + 1);
        JobConf job = new JobConf(conf, QJob.class);
        job.setJobName("Q-job-" + thenumber);
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setOutputFormat(SequenceFileOutputFormat.class);

        if (i == 0)
            job.setMapperClass(QMapper.class);
        else/*from  w  w  w . j a  v  a  2  s.c o m*/
            job.setMapperClass(IdentityMapper.class);

        job.setReducerClass(QReducer.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(LMatrixWritable.class);

        FileSystem fs = FileSystem.get(job);
        Path Paths[];
        fileGather fgather = null;
        if (i == 0)
            fgather = new fileGather(inputPaths, "part", fs);
        else
            fgather = new fileGather(new Path(rinput), "part", fs);
        Paths = fgather.getPaths();
        mis = Checker.checkMis(mis, fgather.getInputSize(), fs);
        job.setNumMapTasks(fgather.recNumMapTasks(mis));

        job.setNumReduceTasks(Integer.parseInt(stages[i]));

        job.setInt(QRFirstJob.COLUMN_SIZE, k + p);
        job.setLong(PROP_OMEGA_SEED, seed);
        job.setInt(PROP_K, k);
        job.setInt(PROP_P, p);

        fs.delete(new Path(routput + thenumber), true);

        FileInputFormat.setInputPaths(job, Paths);

        FileOutputFormat.setOutputPath(job, new Path(routput + thenumber));

        //FileOutputFormat.setCompressOutput(job, true);
        //FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
        //SequenceFileOutputFormat.setOutputCompressionType(job,CompressionType.BLOCK);
        //output first level Q
        MultipleOutputs.addNamedOutput(job, QF_MAT, SequenceFileOutputFormat.class, IntWritable.class,
                LMatrixWritable.class);

        RunningJob rj = JobClient.runJob(job);
        System.out.println("QJob Job ID: " + rj.getJobID().toString());
        rinput = routput + thenumber;
    }
}

From source file:nthu.scopelab.tsqr.ssvd.UJob.java

License:Apache License

public void start(Configuration conf, Path inputPathQ, Path inputUHatPath, Path sigmaPath, Path outputPath,
        int k, boolean uHalfSigma, int mis) throws ClassNotFoundException, InterruptedException, IOException {
    String input = "";

    JobConf job = new JobConf(conf, UJob.class);
    jobclient = new JobClient(job);
    job.setJobName("UJob");
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setMapperClass(MultiplyMapper.class);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LMatrixWritable.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(LMatrixWritable.class);

    FileSystem fs = FileSystem.get(job);
    fileGather fgather = new fileGather(
            new Path(inputPathQ.toString().substring(0, inputPathQ.toString().lastIndexOf("/") - 1)), "Q-", fs);
    mis = Checker.checkMis(mis, fgather.getInputSize(), fs);
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    job.setNumReduceTasks(0);
    job.set("mapreduce.output.basename", OUTPUT_U);
    job.set(PROP_UHAT_PATH, inputUHatPath.toString());
    job.set(PROP_SIGMA_PATH, sigmaPath.toString());
    if (uHalfSigma) {
        job.set(PROP_U_HALFSIGMA, "y");
    }//w w  w.ja  v a 2s .  com
    job.setInt(QJob.PROP_K, k);
    FileSystem.get(job).delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    FileInputFormat.setInputPaths(job, inputPathQ);
    //JobClient.runJob(job);
    jobid = jobclient.submitJob(job).getID();

}

From source file:org.acacia.csr.java.CSRConverter.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (!validArgs(args)) {
        printUsage();// w  ww.j av a2 s .c  om
        return;
    }
    //These are the temp paths that are created on HDFS
    String dir1 = "/user/miyuru/csrconverter-output";
    String dir2 = "/user/miyuru/csrconverter-output-sorted";

    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());

    System.out.println("Deleting the dir : " + dir1);

    if (fs1.exists(new Path(dir1))) {
        fs1.delete(new Path(dir1), true);
    }

    System.out.println("Done deleting the dir : " + dir1);
    System.out.println("Deleting the dir : " + dir2);
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }

    Path notinPath = new Path("/user/miyuru/notinverts/notinverts");

    if (!fs1.exists(notinPath)) {
        fs1.create(notinPath);
    }

    System.out.println("Done deleting the dir : " + dir2);

    //Note on Aug 23 2014: Sometimes after this the mapReduce job hangs. need to see why.

    VertexCounterClient.setDefaultGraphID(args[3], args[2]);

    //First job creates the inverted index

    JobConf conf = new JobConf(CSRConverter.class);
    conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[1]);
    conf.set("org.acacia.partitioner.hbase.table", args[2]);
    conf.set("org.acacia.partitioner.hbase.contacthost", args[3]);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    //conf.setMapperClass(InvertedMapper.class);
    conf.setReducerClass(InvertedReducer.class);
    //conf.setInputFormat(TextInputFormat.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(conf, new Path(args[0]));
    MultipleInputs.addInputPath(conf, new Path(args[0]), NLinesInputFormat.class, InvertedMapper.class);
    MultipleInputs.addInputPath(conf, new Path("/user/miyuru/notinverts/notinverts"), TextInputFormat.class,
            InvertedMapper.class);
    FileOutputFormat.setOutputPath(conf, new Path(dir1));

    //Also for the moment we turn-off the speculative execution
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    conf.setNumMapTasks(96);
    conf.setNumReduceTasks(96);
    conf.setPartitionerClass(VertexPartitioner.class);
    conf.set("vertex-count", args[4]);
    conf.set("zero-flag", args[5]);
    Job job = new Job(conf, "csr_inverter");
    job.setSortComparatorClass(SortComparator.class);
    job.waitForCompletion(true);
}

From source file:org.acacia.partitioner.java.EdgeDistributor.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    String dir1 = "/user/miyuru/input";
    String dir2 = "/user/miyuru/edgedistributed-out";

    //      //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }/*from  w  w w  .j a  v a2  s.  c om*/

    //First job scans through the edge list and splits the edges in to separate files based on the partitioned vertex files.

    JobConf conf = new JobConf(EdgeDistributor.class);
    conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[0]);
    conf.set("org.acacia.partitioner.hbase.table", args[1]);
    conf.set("org.acacia.partitioner.index.contacthost", args[2]);
    conf.set("vert-count", args[3]);
    conf.set("initpartition-id", args[4]);
    conf.set("zero-flag", args[5]);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(FileMapper.class);
    conf.setReducerClass(FileReducer.class);
    //conf.setInputFormat(TextInputFormat.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setNumReduceTasks(96); //Need to specify the number of reduce tasks explicitly. Otherwise it creates only one reduce task.

    FileInputFormat.setInputPaths(conf, new Path(dir1));
    FileOutputFormat.setOutputPath(conf, new Path(dir2));

    MultipleOutputs.addMultiNamedOutput(conf, "partition", TextOutputFormat.class, NullWritable.class,
            Text.class);

    Job job = new Job(conf, "EdgeDistributor");
    job.waitForCompletion(true);

    System.out.println("Done job EdgeDistribution");
}

From source file:org.apache.avro.mapred.TestAvroMultipleInputs.java

License:Apache License

@Test
public void testJob() throws Exception {
    JobConf job = new JobConf();
    String dir = System.getProperty("test.dir", ".") + "target/testAvroMultipleInputs";
    Path inputPath1 = new Path(dir + "/in1");
    Path inputPath2 = new Path(dir + "/in2");
    Path outputPath = new Path(dir + "/out");

    outputPath.getFileSystem(job).delete(outputPath, true);
    inputPath1.getFileSystem(job).delete(inputPath1, true);
    inputPath2.getFileSystem(job).delete(inputPath2, true);

    writeNamesFiles(new File(inputPath1.toUri().getPath()));
    writeBalancesFiles(new File(inputPath2.toUri().getPath()));

    job.setJobName("multiple-inputs-join");
    AvroMultipleInputs.addInputPath(job, inputPath1, NamesMapImpl.class,
            ReflectData.get().getSchema(NamesRecord.class));
    AvroMultipleInputs.addInputPath(job, inputPath2, BalancesMapImpl.class,
            ReflectData.get().getSchema(BalancesRecord.class));

    Schema keySchema = ReflectData.get().getSchema(KeyRecord.class);
    Schema valueSchema = ReflectData.get().getSchema(JoinableRecord.class);
    AvroJob.setMapOutputSchema(job, Pair.getPairSchema(keySchema, valueSchema));
    AvroJob.setOutputSchema(job, ReflectData.get().getSchema(CompleteRecord.class));

    AvroJob.setReducerClass(job, ReduceImpl.class);
    job.setNumReduceTasks(1);

    FileOutputFormat.setOutputPath(job, outputPath);

    AvroJob.setReflect(job);/*from w  ww .j av a  2s  .  c o  m*/

    JobClient.runJob(job);

    validateCompleteFile(new File(new File(dir, "out"), "part-00000.avro"));
}

From source file:org.apache.avro.mapred.TestAvroMultipleOutputs.java

License:Apache License

@SuppressWarnings("deprecation")
public void testJob_noreducer() throws Exception {
    JobConf job = new JobConf();
    job.setNumReduceTasks(0);
    //    private static final String UTF8 = "UTF-8";
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path outputPath = new Path(dir + "/out");

    outputPath.getFileSystem(job).delete(outputPath);
    WordCountUtil.writeLinesFile();/*from w  w w  .j  av  a  2s  .  c  om*/

    job.setJobName("AvroMultipleOutputs_noreducer");

    AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema());

    AvroJob.setMapperClass(job, MapImpl.class);

    FileInputFormat.setInputPaths(job, new Path(dir + "/in"));
    FileOutputFormat.setOutputPath(job, outputPath);
    FileOutputFormat.setCompressOutput(job, false);
    AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class,
            Schema.create(Schema.Type.STRING));
    JobClient.runJob(job);
}

From source file:org.apache.avro.mapred.TestGenericJob.java

License:Apache License

@Test
public void testJob() throws Exception {
    JobConf job = new JobConf();
    Path outputPath = new Path(dir + "/out");
    outputPath.getFileSystem(job).delete(outputPath);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, dir + "/in");

    job.setMapperClass(AvroTestConverter.class);
    job.setNumReduceTasks(0);

    FileOutputFormat.setOutputPath(job, outputPath);
    System.out.println(createSchema());
    AvroJob.setOutputSchema(job, Pair.getPairSchema(Schema.create(Schema.Type.LONG), createSchema()));
    job.setOutputFormat(AvroOutputFormat.class);

    JobClient.runJob(job);//from   ww  w .  ja v  a 2  s. c om
}

From source file:org.apache.avro.mapred.TestSequenceFileReader.java

License:Apache License

@Test
public void testNonAvroMapOnly() throws Exception {
    JobConf job = new JobConf();
    Path output = new Path(System.getProperty("test.dir", ".") + "/seq-out");

    output.getFileSystem(job).delete(output);

    // configure input for non-Avro sequence file
    job.setInputFormat(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, FILE.toURI().toString());

    // use a hadoop mapper that emits Avro output
    job.setMapperClass(NonAvroOnlyMapper.class);

    // configure output for avro
    job.setNumReduceTasks(0); // map-only
    FileOutputFormat.setOutputPath(job, output);
    AvroJob.setOutputSchema(job, SCHEMA);

    JobClient.runJob(job);//from   w  w  w  .  j a  va 2  s  . com

    checkFile(new DataFileReader<Pair<Long, CharSequence>>(new File(output.toString() + "/part-00000.avro"),
            new SpecificDatumReader<Pair<Long, CharSequence>>()));
}