Example usage for org.apache.hadoop.mapred.lib MultipleOutputs addNamedOutput

List of usage examples for org.apache.hadoop.mapred.lib MultipleOutputs addNamedOutput

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred.lib MultipleOutputs addNamedOutput.

Prototype

public static void addNamedOutput(JobConf conf, String namedOutput,
        Class<? extends OutputFormat> outputFormatClass, Class<?> keyClass, Class<?> valueClass) 

Source Link

Document

Adds a named output for the job.

Usage

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static void setUpMultipleOutputs(JobConf job, byte[] resultIndexes, byte[] resultDimsUnknwon,
        String[] outputs, OutputInfo[] outputInfos, boolean inBlockRepresentation, boolean mayContainCtable)
        throws Exception {
    if (resultIndexes.length != outputs.length)
        throw new Exception("number of outputs and result indexes does not match");
    if (outputs.length != outputInfos.length)
        throw new Exception("number of outputs and outputInfos indexes does not match");

    job.set(RESULT_INDEXES_CONFIG, MRJobConfiguration.getIndexesString(resultIndexes));
    job.set(RESULT_DIMS_UNKNOWN_CONFIG, MRJobConfiguration.getIndexesString(resultDimsUnknwon));
    job.setStrings(OUTPUT_MATRICES_DIRS_CONFIG, outputs);
    job.setOutputCommitter(MultipleOutputCommitter.class);

    for (int i = 0; i < outputs.length; i++) {
        MapReduceTool.deleteFileIfExistOnHDFS(new Path(outputs[i]), job);
        if (mayContainCtable && resultDimsUnknwon[i] == (byte) 1) {
            setOutputInfo(job, i, outputInfos[i], false);
        } else {//from w w w .  j a va2s . c  om
            setOutputInfo(job, i, outputInfos[i], inBlockRepresentation);
        }
        MultipleOutputs.addNamedOutput(job, Integer.toString(i), outputInfos[i].outputFormatClass,
                outputInfos[i].outputKeyClass, outputInfos[i].outputValueClass);
    }
    job.setOutputFormat(NullOutputFormat.class);

    // configure temp output
    Path tempOutputPath = new Path(constructTempOutputFilename());
    FileOutputFormat.setOutputPath(job, tempOutputPath);
    MapReduceTool.deleteFileIfExistOnHDFS(tempOutputPath, job);

}

From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.SortDictMR.java

License:Open Source License

/**
 * @param inputpath//w  ww .j  a va 2  s .  co  m
 *          the path to a rawId to newId dictionary.
 * @param outputpath
 *          the path of output directory.
 * @throws IOException
 */
public void run(String inputpath, String outputpath) throws IOException {

    JobConf conf = new JobConf(SortDictMR.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setMapperClass(SortDictMapper.class);
    conf.setReducerClass(SortDictReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setBoolean("hashRawVid", hashRawVid);
    conf.setInt("numChunks", numChunks);
    conf.set("VidParser", vidparser.getClass().getName());

    String outprefix = "vidhashmap";
    for (int i = 0; i < numChunks; i++) {
        MultipleOutputs.addNamedOutput(conf, outprefix + i, TextOutputFormat.class, Text.class, Text.class);
    }

    FileInputFormat.setInputPaths(conf, new Path(inputpath));
    FileOutputFormat.setOutputPath(conf, new Path(outputpath));

    LOG.info("========== Job: Partition the map of rawid -> id ===========");
    LOG.info("Input = " + inputpath);
    LOG.info("Output = " + outputpath);
    LOG.info("======================================================");
    if (hashRawVid)
        LOG.info("Partition on rawId.");
    else
        LOG.info("Partition on newId");
    LOG.debug("numChunks = " + numChunks);
    LOG.debug("VidParser = " + vidparser.getClass().getName());
    JobClient.runJob(conf);
    LOG.info("======================= Done ==========================\n");
}

From source file:gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.java

public static void main(String[] args) {
    //paths and input handling
    Path inputRatingsPath = new Path(args[0]); //movieid, userid, rating (text files)
    Path job1OutputPath = new Path("/user/hduser/partialResults");
    Path partialDistancesPath = new Path("/user/hduser/partialResults/part-*"); //member_nonMember \t partialDistance (sequence files)
    Path candidateMoviesPath = new Path("/user/hduser/partialResults/candidateMovies-*"); //candidateMovieId, nonMemberUserId_rating (text files)
    Path userSimilaritiesPath = new Path("/user/hduser/userSimilarities"); //similarity of each group member to his friends (text files)
    Path finalScoresPath = new Path(args[1]); //movieId \t outputScore

    int numReduceTasks = 56; //defaultValue
    if (args.length == 3) {
        numReduceTasks = Integer.parseInt(args[2]);
    }/*from w ww.  j ava2  s  .c  o m*/

    final float friendsSimThresh = 0.8f;

    String groupFilePath = "/user/hduser/group.txt"; //one-line csv file with user ids (text file)

    if (args.length < 2 || args.length > 3) {
        System.err.println(
                "Incorrect input. Example usage: hadoop jar ~/GroupRecs/MultiUser.jar inputPath outputPath [numReduceTasks]");
        return;
    }

    //JOB 1//
    JobClient client = new JobClient();
    JobConf conf = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class);

    try {
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(job1OutputPath)) {
            fs.delete(job1OutputPath, true);
        }
        if (fs.exists(userSimilaritiesPath)) {
            fs.delete(userSimilaritiesPath, true);
        }
        if (fs.exists(finalScoresPath)) {
            fs.delete(finalScoresPath, true);
        }
    } catch (IOException ex) {
        Logger.getLogger(MultiUserMain.class.getName()).log(Level.SEVERE, null, ex);
    }

    conf.setJobName("Multi-user approach - Job 1");
    System.out.println("Starting Job 1 (Multi-user approach)...");

    conf.setMapOutputKeyClass(VIntWritable.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(ByteWritable.class);

    conf.setInputFormat(TextInputFormat.class);
    //conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);

    FileInputFormat.setInputPaths(conf, inputRatingsPath); //user ratings
    FileOutputFormat.setOutputPath(conf, job1OutputPath); //partial distances

    MultipleOutputs.addNamedOutput(conf, "candidateMovies", SequenceFileOutputFormat.class, VIntWritable.class,
            Text.class); //movieId, userId_rating

    conf.setMapperClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Mapper.class);
    conf.setReducerClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Reducer.class);

    conf.setNumReduceTasks(numReduceTasks);

    try {
        DistributedCache.addCacheFile(new URI(groupFilePath), conf); // group   
    } catch (URISyntaxException e1) {
        System.err.println(e1.toString());
    }

    conf.setInt("mapred.task.timeout", 6000000);

    client.setConf(conf);
    RunningJob job;
    try {
        job = JobClient.runJob(conf);
        job.waitForCompletion();
    } catch (Exception e) {
        System.err.println(e);
    }

    //JOB 2//
    System.out.println("Starting Job 2 (Multi-user approach)...");
    JobClient client2 = new JobClient();
    JobConf conf2 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class);

    conf2.setJobName("Multi-user approach - Job 2");

    conf2.setMapOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends
    conf2.setMapOutputValueClass(ByteWritable.class);//similarity part unsquared

    conf2.setOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends
    conf2.setOutputValueClass(DoubleWritable.class);//similarity

    conf2.setInputFormat(SequenceFileInputFormat.class);
    //conf2.setInputFormat(TextInputFormat.class);
    conf2.setOutputFormat(TextOutputFormat.class);
    //conf2.setOutputFormat(SequenceFileOutputFormat.class);
    //SequenceFileOutputFormat.setOutputCompressionType(conf2, SequenceFile.CompressionType.BLOCK);

    FileInputFormat.setInputPaths(conf2, partialDistancesPath); //Job 1 output
    FileOutputFormat.setOutputPath(conf2, userSimilaritiesPath); //Job 2 output (similarity of each group member to his friends)

    conf2.setMapperClass(IdentityMapper.class);
    conf2.setReducerClass(gr.forth.ics.isl.grouprecsmr.job2.Job2ReducerMulti.class);

    int numSimilaritiesPartitions = numReduceTasks;
    conf2.setNumReduceTasks(numSimilaritiesPartitions);

    conf2.setFloat("friendsSimThreshold", friendsSimThresh);

    conf2.setInt("mapred.task.timeout", 6000000);
    conf2.set("io.sort.mb", "500");

    client2.setConf(conf2);
    RunningJob job2;
    try {
        job2 = JobClient.runJob(conf2);
        job2.waitForCompletion();
    } catch (Exception e) {
        System.err.println(e);
    }

    //JOB 3//
    System.out.println("Starting Job 3 (Multi-user approach)...");
    JobClient client3 = new JobClient();
    JobConf conf3 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class);

    conf3.setJobName("Multi-user approach - Job 3");

    conf3.setMapOutputKeyClass(VIntWritable.class);
    conf3.setMapOutputValueClass(Text.class);

    conf3.setOutputKeyClass(VIntWritable.class);
    conf3.setOutputValueClass(DoubleWritable.class);

    conf3.setInputFormat(SequenceFileInputFormat.class);
    //conf3.setInputFormat(TextInputFormat.class);
    conf3.setOutputFormat(TextOutputFormat.class);
    //conf3.setOutputFormat(SequenceFileOutputFormat.class);
    //SequenceFileOutputFormat.setOutputCompressionType(conf3,SequenceFile.CompressionType.BLOCK);

    try {
        DistributedCache.addCacheFile(new URI(groupFilePath), conf3);
    } catch (URISyntaxException ex) {
        System.err.println("Could not add group file to distributed cache. " + ex);
    }
    for (int i = 0; i < numSimilaritiesPartitions; i++) {
        String reduceId = String.format("%05d", i); //5-digit int with leading
        try {
            DistributedCache.addCacheFile(new URI(userSimilaritiesPath.toString() + "/part-" + reduceId),
                    conf3);
        } catch (URISyntaxException ex) {
            System.err.println("Could not add similarities files to distributed cache. " + ex);
        }

    }

    FileInputFormat.setInputPaths(conf3, candidateMoviesPath); //Job 1 output (candidate movies)
    FileOutputFormat.setOutputPath(conf3, finalScoresPath); //Job 3 output (movie \t outputScore)

    //        conf3.setMapperClass(IdentityMapper.class);      
    conf3.setMapperClass(gr.forth.ics.isl.grouprecsmr.job3.Job3MapperMulti.class); //filtering out ratings from non-Friends
    conf3.setReducerClass(gr.forth.ics.isl.grouprecsmr.job3.Job3ReducerMulti.class);

    conf3.setInt("mapred.task.timeout", 6000000);
    conf3.set("io.sort.mb", "500");

    conf3.setNumReduceTasks(numReduceTasks);

    client3.setConf(conf3);
    RunningJob job3;
    try {
        job3 = JobClient.runJob(conf3);
        job3.waitForCompletion();
    } catch (Exception e) {
        System.err.println(e);
    }
}

From source file:nthu.scopelab.tsqr.ssvd.BtJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p,
        int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis)
        throws Exception {
    boolean outputQ = true;

    String stages[] = reduceSchedule.split(",");

    JobConf job = new JobConf(conf, BtJob.class);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setInt(SCHEDULE_NUM, stages.length);
    job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight);
    job.setInt(QJob.PROP_K, k);//from  ww  w.java 2 s .  c  o  m
    job.setInt(QJob.PROP_P, p);
    job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ);
    job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts);
    job.set(QmultiplyJob.QRF_DIR, qrfPath);
    FileSystem.get(job).delete(btPath, true);

    FileOutputFormat.setOutputPath(job, btPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setJobName("BtJob");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    //job.setOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(BtMapper.class);
    job.setCombinerClass(OuterProductCombiner.class);
    job.setReducerClass(OuterProductReducer.class);

    fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job));
    mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job));
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    //job.setNumReduceTasks(0);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, inputPath);

    if (outputQ) {
        MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class,
                IntWritable.class, LMatrixWritable.class);
    }
    if (outputBBtProducts) {
        MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
    }
    RunningJob rj = JobClient.runJob(job);
    System.out.println("Btjob Job ID: " + rj.getJobID().toString());
}

From source file:nthu.scopelab.tsqr.ssvd.itBtJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p,
        int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis)
        throws Exception {
    boolean outputQ = true;

    String stages[] = reduceSchedule.split(",");

    JobConf job = new JobConf(conf, itBtJob.class);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setInt(SCHEDULE_NUM, stages.length);
    job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight);
    job.setInt(QJob.PROP_K, k);/*from  w w w.j a  v a 2  s .  com*/
    job.setInt(QJob.PROP_P, p);
    job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ);
    job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts);
    job.set(QmultiplyJob.QRF_DIR, qrfPath);
    FileSystem.get(job).delete(btPath, true);

    FileOutputFormat.setOutputPath(job, btPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setJobName("itBtJob");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    //job.setOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(BtMapper.class);
    job.setCombinerClass(OuterProductCombiner.class);
    job.setReducerClass(OuterProductReducer.class);

    fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job));
    mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job));
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    //job.setNumReduceTasks(0);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, inputPath);

    if (outputQ) {
        MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class,
                IntWritable.class, LMatrixWritable.class);
    }
    if (outputBBtProducts) {
        MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
    }
    RunningJob rj = JobClient.runJob(job);
    System.out.println("itBtJob Job ID: " + rj.getJobID().toString());
}

From source file:nthu.scopelab.tsqr.ssvd.itQJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPaths, String outputPath, String reduceSchedule, int k,
        int p, long seed, int mis) throws ClassNotFoundException, InterruptedException, IOException {

    String stages[] = reduceSchedule.split(",");
    String rinput = "";
    String routput = outputPath + "/iter-r-";

    for (int i = 0; i < stages.length; i++) {
        String thenumber = Integer.toString(i + 1);
        JobConf job = new JobConf(conf, itQJob.class);
        job.setJobName("itQ-job-" + thenumber);
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setOutputFormat(SequenceFileOutputFormat.class);

        if (i == 0)
            job.setMapperClass(QMapper.class);
        else/*from   ww w.j  av a2  s.c  o  m*/
            job.setMapperClass(IdentityMapper.class);

        job.setReducerClass(QReducer.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(LMatrixWritable.class);

        FileSystem fs = FileSystem.get(job);
        Path Paths[];
        fileGather fgather = null;
        if (i == 0)
            fgather = new fileGather(inputPaths, "part", fs);
        else
            fgather = new fileGather(new Path(rinput), "part", fs);
        Paths = fgather.getPaths();
        mis = Checker.checkMis(mis, fgather.getInputSize(), fs);
        job.setNumMapTasks(fgather.recNumMapTasks(mis));

        job.setNumReduceTasks(Integer.parseInt(stages[i]));

        job.setInt(QRFirstJob.COLUMN_SIZE, k + p);
        job.setLong(PROP_OMEGA_SEED, seed);
        job.setInt(PROP_K, k);
        job.setInt(PROP_P, p);

        fs.delete(new Path(routput + thenumber), true);

        FileInputFormat.setInputPaths(job, Paths);

        FileOutputFormat.setOutputPath(job, new Path(routput + thenumber));

        //FileOutputFormat.setCompressOutput(job, true);
        //FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
        //SequenceFileOutputFormat.setOutputCompressionType(job,CompressionType.BLOCK);
        //output first level Q
        MultipleOutputs.addNamedOutput(job, QF_MAT, SequenceFileOutputFormat.class, IntWritable.class,
                LMatrixWritable.class);

        RunningJob rj = JobClient.runJob(job);
        System.out.println("itQJob Job ID: " + rj.getJobID().toString());
        rinput = routput + thenumber;
    }
}

From source file:nthu.scopelab.tsqr.ssvd.QJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPaths, String outputPath, String reduceSchedule, int k,
        int p, long seed, int mis) throws ClassNotFoundException, InterruptedException, IOException {

    String stages[] = reduceSchedule.split(",");
    String rinput = "";
    String routput = outputPath + "/iter-r-";

    for (int i = 0; i < stages.length; i++) {
        String thenumber = Integer.toString(i + 1);
        JobConf job = new JobConf(conf, QJob.class);
        job.setJobName("Q-job-" + thenumber);
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setOutputFormat(SequenceFileOutputFormat.class);

        if (i == 0)
            job.setMapperClass(QMapper.class);
        else/*from   w w  w .j a v a  2s .  com*/
            job.setMapperClass(IdentityMapper.class);

        job.setReducerClass(QReducer.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(LMatrixWritable.class);

        FileSystem fs = FileSystem.get(job);
        Path Paths[];
        fileGather fgather = null;
        if (i == 0)
            fgather = new fileGather(inputPaths, "part", fs);
        else
            fgather = new fileGather(new Path(rinput), "part", fs);
        Paths = fgather.getPaths();
        mis = Checker.checkMis(mis, fgather.getInputSize(), fs);
        job.setNumMapTasks(fgather.recNumMapTasks(mis));

        job.setNumReduceTasks(Integer.parseInt(stages[i]));

        job.setInt(QRFirstJob.COLUMN_SIZE, k + p);
        job.setLong(PROP_OMEGA_SEED, seed);
        job.setInt(PROP_K, k);
        job.setInt(PROP_P, p);

        fs.delete(new Path(routput + thenumber), true);

        FileInputFormat.setInputPaths(job, Paths);

        FileOutputFormat.setOutputPath(job, new Path(routput + thenumber));

        //FileOutputFormat.setCompressOutput(job, true);
        //FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
        //SequenceFileOutputFormat.setOutputCompressionType(job,CompressionType.BLOCK);
        //output first level Q
        MultipleOutputs.addNamedOutput(job, QF_MAT, SequenceFileOutputFormat.class, IntWritable.class,
                LMatrixWritable.class);

        RunningJob rj = JobClient.runJob(job);
        System.out.println("QJob Job ID: " + rj.getJobID().toString());
        rinput = routput + thenumber;
    }
}

From source file:org.apache.mahout.clustering.lda.cvb.CVB0PriorMapper.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf conf = new JobConf();
    Job job = new Job(conf);

    MultipleOutputs.addNamedOutput(conf, DOCTOPIC_OUT, SequenceFileOutputFormat.class, IntWritable.class,
            VectorWritable.class);

    Path aPath = null;/*from ww w  .  j a v a 2  s.c  o m*/
    Path bPath = null;
    conf.setInputFormat(CompositeInputFormat.class);
    conf.set("mapred.join.expr",
            CompositeInputFormat.compose("inner", SequenceFileInputFormat.class, aPath, bPath));
}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.BtJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPathA, Path inputPathQJob, Path xiPath, Path outputPath,
        int minSplitSize, int k, int p, int btBlockHeight, int numReduceTasks, boolean broadcast,
        Class<? extends Writable> labelClass, boolean outputBBtProducts)
        throws ClassNotFoundException, InterruptedException, IOException {

    JobConf oldApiJob = new JobConf(conf);

    MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_Q, org.apache.hadoop.mapred.SequenceFileOutputFormat.class,
            labelClass, VectorWritable.class);

    if (outputBBtProducts) {
        MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_BBT,
                org.apache.hadoop.mapred.SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
        /*/*from  w  w  w  . j av a2 s. c o m*/
         * MAHOUT-1067: if we are asked to output BBT products then named vector
         * names should be propagated to Q too so that UJob could pick them up
         * from there.
         */
        oldApiJob.setBoolean(PROP_NV, true);
    }
    if (xiPath != null) {
        // compute pca -related stuff as well
        MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_SQ,
                org.apache.hadoop.mapred.SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
        MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_SB,
                org.apache.hadoop.mapred.SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
    }

    /*
     * HACK: we use old api multiple outputs since they are not available in the
     * new api of either 0.20.2 or 0.20.203 but wrap it into a new api job so we
     * can use new api interfaces.
     */

    Job job = new Job(oldApiJob);
    job.setJobName("Bt-job");
    job.setJarByClass(BtJob.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(job, inputPathA);
    if (minSplitSize > 0) {
        FileInputFormat.setMinInputSplitSize(job, minSplitSize);
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    // WARN: tight hadoop integration here:
    job.getConfiguration().set("mapreduce.output.basename", OUTPUT_BT);

    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(SparseRowBlockWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(BtMapper.class);
    job.setCombinerClass(OuterProductCombiner.class);
    job.setReducerClass(OuterProductReducer.class);

    job.getConfiguration().setInt(QJob.PROP_K, k);
    job.getConfiguration().setInt(QJob.PROP_P, p);
    job.getConfiguration().set(PROP_QJOB_PATH, inputPathQJob.toString());
    job.getConfiguration().setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts);
    job.getConfiguration().setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, btBlockHeight);

    job.setNumReduceTasks(numReduceTasks);

    /*
     * PCA-related options, MAHOUT-817
     */
    if (xiPath != null) {
        job.getConfiguration().set(PROP_XI_PATH, xiPath.toString());
    }

    /*
     * we can broadhast Rhat files since all of them are reuqired by each job,
     * but not Q files which correspond to splits of A (so each split of A will
     * require only particular Q file, each time different one).
     */

    if (broadcast) {
        job.getConfiguration().set(PROP_RHAT_BROADCAST, "y");

        FileSystem fs = FileSystem.get(inputPathQJob.toUri(), conf);
        FileStatus[] fstats = fs.globStatus(new Path(inputPathQJob, QJob.OUTPUT_RHAT + "-*"));
        if (fstats != null) {
            for (FileStatus fstat : fstats) {
                /*
                 * new api is not enabled yet in our dependencies at this time, still
                 * using deprecated one
                 */
                DistributedCache.addCacheFile(fstat.getPath().toUri(), job.getConfiguration());
            }
        }
    }

    job.submit();
    job.waitForCompletion(false);

    if (!job.isSuccessful()) {
        throw new IOException("Bt job unsuccessful.");
    }
}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.QJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPaths, Path sbPath, Path outputPath, int aBlockRows,
        int minSplitSize, int k, int p, long seed, int numReduceTasks)
        throws ClassNotFoundException, InterruptedException, IOException {

    JobConf oldApiJob = new JobConf(conf);
    MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_QHAT,
            org.apache.hadoop.mapred.SequenceFileOutputFormat.class, SplitPartitionedWritable.class,
            DenseBlockWritable.class);
    MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_RHAT,
            org.apache.hadoop.mapred.SequenceFileOutputFormat.class, SplitPartitionedWritable.class,
            VectorWritable.class);

    Job job = new Job(oldApiJob);
    job.setJobName("Q-job");
    job.setJarByClass(QJob.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, inputPaths);
    if (minSplitSize > 0) {
        FileInputFormat.setMinInputSplitSize(job, minSplitSize);
    }//from   w  w w .  j a va 2 s.c  om

    FileOutputFormat.setOutputPath(job, outputPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setMapOutputKeyClass(SplitPartitionedWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    job.setOutputKeyClass(SplitPartitionedWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(QMapper.class);

    job.getConfiguration().setInt(PROP_AROWBLOCK_SIZE, aBlockRows);
    job.getConfiguration().setLong(PROP_OMEGA_SEED, seed);
    job.getConfiguration().setInt(PROP_K, k);
    job.getConfiguration().setInt(PROP_P, p);
    if (sbPath != null) {
        job.getConfiguration().set(PROP_SB_PATH, sbPath.toString());
    }

    /*
     * number of reduce tasks doesn't matter. we don't actually send anything to
     * reducers.
     */

    job.setNumReduceTasks(0 /* numReduceTasks */);

    job.submit();
    job.waitForCompletion(false);

    if (!job.isSuccessful()) {
        throw new IOException("Q job unsuccessful.");
    }

}