Example usage for org.apache.hadoop.mapred JobConf setOutputFormat

List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputFormat.

Prototype

public void setOutputFormat(Class<? extends OutputFormat> theClass) 

Source Link

Document

Set the OutputFormat implementation for the map-reduce job.

Usage

From source file:nthu.scopelab.tsqr.ssvd.ABtDenseOutJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPath, Path inputBt, Path outputPath, int k, int p,
        int reduceTasks, int mis) throws Exception {

    JobConf job = new JobConf(conf, ABtDenseOutJob.class);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setInt(QJob.PROP_K, k);/*w  w w.j  a  v a2 s  .co  m*/
    job.setInt(QJob.PROP_P, p);
    job.set(PROP_BT_PATH, inputBt.toString());

    FileOutputFormat.setOutputPath(job, outputPath);
    job.setJobName("ABtDenseOutJob");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(LMatrixWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(LMatrixWritable.class);

    job.setMapperClass(ABtMapper.class);

    fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job));
    mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job));
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, inputPath);

    RunningJob rj = JobClient.runJob(job);
}

From source file:nthu.scopelab.tsqr.ssvd.BtJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p,
        int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis)
        throws Exception {
    boolean outputQ = true;

    String stages[] = reduceSchedule.split(",");

    JobConf job = new JobConf(conf, BtJob.class);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setInt(SCHEDULE_NUM, stages.length);
    job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight);
    job.setInt(QJob.PROP_K, k);/*from  w w  w.  j  a  v a  2  s. c  o  m*/
    job.setInt(QJob.PROP_P, p);
    job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ);
    job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts);
    job.set(QmultiplyJob.QRF_DIR, qrfPath);
    FileSystem.get(job).delete(btPath, true);

    FileOutputFormat.setOutputPath(job, btPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setJobName("BtJob");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    //job.setOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(BtMapper.class);
    job.setCombinerClass(OuterProductCombiner.class);
    job.setReducerClass(OuterProductReducer.class);

    fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job));
    mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job));
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    //job.setNumReduceTasks(0);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, inputPath);

    if (outputQ) {
        MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class,
                IntWritable.class, LMatrixWritable.class);
    }
    if (outputBBtProducts) {
        MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
    }
    RunningJob rj = JobClient.runJob(job);
    System.out.println("Btjob Job ID: " + rj.getJobID().toString());
}

From source file:nthu.scopelab.tsqr.ssvd.itBtJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p,
        int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis)
        throws Exception {
    boolean outputQ = true;

    String stages[] = reduceSchedule.split(",");

    JobConf job = new JobConf(conf, itBtJob.class);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setInt(SCHEDULE_NUM, stages.length);
    job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight);
    job.setInt(QJob.PROP_K, k);//from   ww  w  . j a  v  a  2s. com
    job.setInt(QJob.PROP_P, p);
    job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ);
    job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts);
    job.set(QmultiplyJob.QRF_DIR, qrfPath);
    FileSystem.get(job).delete(btPath, true);

    FileOutputFormat.setOutputPath(job, btPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setJobName("itBtJob");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    //job.setOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(BtMapper.class);
    job.setCombinerClass(OuterProductCombiner.class);
    job.setReducerClass(OuterProductReducer.class);

    fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job));
    mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job));
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    //job.setNumReduceTasks(0);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, inputPath);

    if (outputQ) {
        MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class,
                IntWritable.class, LMatrixWritable.class);
    }
    if (outputBBtProducts) {
        MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
    }
    RunningJob rj = JobClient.runJob(job);
    System.out.println("itBtJob Job ID: " + rj.getJobID().toString());
}

From source file:nthu.scopelab.tsqr.ssvd.itQJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPaths, String outputPath, String reduceSchedule, int k,
        int p, long seed, int mis) throws ClassNotFoundException, InterruptedException, IOException {

    String stages[] = reduceSchedule.split(",");
    String rinput = "";
    String routput = outputPath + "/iter-r-";

    for (int i = 0; i < stages.length; i++) {
        String thenumber = Integer.toString(i + 1);
        JobConf job = new JobConf(conf, itQJob.class);
        job.setJobName("itQ-job-" + thenumber);
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setOutputFormat(SequenceFileOutputFormat.class);

        if (i == 0)
            job.setMapperClass(QMapper.class);
        else//from  www .  j ava2  s . c om
            job.setMapperClass(IdentityMapper.class);

        job.setReducerClass(QReducer.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(LMatrixWritable.class);

        FileSystem fs = FileSystem.get(job);
        Path Paths[];
        fileGather fgather = null;
        if (i == 0)
            fgather = new fileGather(inputPaths, "part", fs);
        else
            fgather = new fileGather(new Path(rinput), "part", fs);
        Paths = fgather.getPaths();
        mis = Checker.checkMis(mis, fgather.getInputSize(), fs);
        job.setNumMapTasks(fgather.recNumMapTasks(mis));

        job.setNumReduceTasks(Integer.parseInt(stages[i]));

        job.setInt(QRFirstJob.COLUMN_SIZE, k + p);
        job.setLong(PROP_OMEGA_SEED, seed);
        job.setInt(PROP_K, k);
        job.setInt(PROP_P, p);

        fs.delete(new Path(routput + thenumber), true);

        FileInputFormat.setInputPaths(job, Paths);

        FileOutputFormat.setOutputPath(job, new Path(routput + thenumber));

        //FileOutputFormat.setCompressOutput(job, true);
        //FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
        //SequenceFileOutputFormat.setOutputCompressionType(job,CompressionType.BLOCK);
        //output first level Q
        MultipleOutputs.addNamedOutput(job, QF_MAT, SequenceFileOutputFormat.class, IntWritable.class,
                LMatrixWritable.class);

        RunningJob rj = JobClient.runJob(job);
        System.out.println("itQJob Job ID: " + rj.getJobID().toString());
        rinput = routput + thenumber;
    }
}

From source file:nthu.scopelab.tsqr.ssvd.QJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPaths, String outputPath, String reduceSchedule, int k,
        int p, long seed, int mis) throws ClassNotFoundException, InterruptedException, IOException {

    String stages[] = reduceSchedule.split(",");
    String rinput = "";
    String routput = outputPath + "/iter-r-";

    for (int i = 0; i < stages.length; i++) {
        String thenumber = Integer.toString(i + 1);
        JobConf job = new JobConf(conf, QJob.class);
        job.setJobName("Q-job-" + thenumber);
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setOutputFormat(SequenceFileOutputFormat.class);

        if (i == 0)
            job.setMapperClass(QMapper.class);
        else/*from w  w w .j a  v a 2  s  . c om*/
            job.setMapperClass(IdentityMapper.class);

        job.setReducerClass(QReducer.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(LMatrixWritable.class);

        FileSystem fs = FileSystem.get(job);
        Path Paths[];
        fileGather fgather = null;
        if (i == 0)
            fgather = new fileGather(inputPaths, "part", fs);
        else
            fgather = new fileGather(new Path(rinput), "part", fs);
        Paths = fgather.getPaths();
        mis = Checker.checkMis(mis, fgather.getInputSize(), fs);
        job.setNumMapTasks(fgather.recNumMapTasks(mis));

        job.setNumReduceTasks(Integer.parseInt(stages[i]));

        job.setInt(QRFirstJob.COLUMN_SIZE, k + p);
        job.setLong(PROP_OMEGA_SEED, seed);
        job.setInt(PROP_K, k);
        job.setInt(PROP_P, p);

        fs.delete(new Path(routput + thenumber), true);

        FileInputFormat.setInputPaths(job, Paths);

        FileOutputFormat.setOutputPath(job, new Path(routput + thenumber));

        //FileOutputFormat.setCompressOutput(job, true);
        //FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
        //SequenceFileOutputFormat.setOutputCompressionType(job,CompressionType.BLOCK);
        //output first level Q
        MultipleOutputs.addNamedOutput(job, QF_MAT, SequenceFileOutputFormat.class, IntWritable.class,
                LMatrixWritable.class);

        RunningJob rj = JobClient.runJob(job);
        System.out.println("QJob Job ID: " + rj.getJobID().toString());
        rinput = routput + thenumber;
    }
}

From source file:nthu.scopelab.tsqr.ssvd.UJob.java

License:Apache License

public void start(Configuration conf, Path inputPathQ, Path inputUHatPath, Path sigmaPath, Path outputPath,
        int k, boolean uHalfSigma, int mis) throws ClassNotFoundException, InterruptedException, IOException {
    String input = "";

    JobConf job = new JobConf(conf, UJob.class);
    jobclient = new JobClient(job);
    job.setJobName("UJob");
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setMapperClass(MultiplyMapper.class);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LMatrixWritable.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(LMatrixWritable.class);

    FileSystem fs = FileSystem.get(job);
    fileGather fgather = new fileGather(
            new Path(inputPathQ.toString().substring(0, inputPathQ.toString().lastIndexOf("/") - 1)), "Q-", fs);
    mis = Checker.checkMis(mis, fgather.getInputSize(), fs);
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    job.setNumReduceTasks(0);//w  w  w  .java 2 s .  c om
    job.set("mapreduce.output.basename", OUTPUT_U);
    job.set(PROP_UHAT_PATH, inputUHatPath.toString());
    job.set(PROP_SIGMA_PATH, sigmaPath.toString());
    if (uHalfSigma) {
        job.set(PROP_U_HALFSIGMA, "y");
    }
    job.setInt(QJob.PROP_K, k);
    FileSystem.get(job).delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    FileInputFormat.setInputPaths(job, inputPathQ);
    //JobClient.runJob(job);
    jobid = jobclient.submitJob(job).getID();

}

From source file:org.acacia.csr.java.CSRConverter.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (!validArgs(args)) {
        printUsage();// w  ww. ja v  a2s  . co m
        return;
    }
    //These are the temp paths that are created on HDFS
    String dir1 = "/user/miyuru/csrconverter-output";
    String dir2 = "/user/miyuru/csrconverter-output-sorted";

    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());

    System.out.println("Deleting the dir : " + dir1);

    if (fs1.exists(new Path(dir1))) {
        fs1.delete(new Path(dir1), true);
    }

    System.out.println("Done deleting the dir : " + dir1);
    System.out.println("Deleting the dir : " + dir2);
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }

    Path notinPath = new Path("/user/miyuru/notinverts/notinverts");

    if (!fs1.exists(notinPath)) {
        fs1.create(notinPath);
    }

    System.out.println("Done deleting the dir : " + dir2);

    //Note on Aug 23 2014: Sometimes after this the mapReduce job hangs. need to see why.

    VertexCounterClient.setDefaultGraphID(args[3], args[2]);

    //First job creates the inverted index

    JobConf conf = new JobConf(CSRConverter.class);
    conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[1]);
    conf.set("org.acacia.partitioner.hbase.table", args[2]);
    conf.set("org.acacia.partitioner.hbase.contacthost", args[3]);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    //conf.setMapperClass(InvertedMapper.class);
    conf.setReducerClass(InvertedReducer.class);
    //conf.setInputFormat(TextInputFormat.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(conf, new Path(args[0]));
    MultipleInputs.addInputPath(conf, new Path(args[0]), NLinesInputFormat.class, InvertedMapper.class);
    MultipleInputs.addInputPath(conf, new Path("/user/miyuru/notinverts/notinverts"), TextInputFormat.class,
            InvertedMapper.class);
    FileOutputFormat.setOutputPath(conf, new Path(dir1));

    //Also for the moment we turn-off the speculative execution
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    conf.setNumMapTasks(96);
    conf.setNumReduceTasks(96);
    conf.setPartitionerClass(VertexPartitioner.class);
    conf.set("vertex-count", args[4]);
    conf.set("zero-flag", args[5]);
    Job job = new Job(conf, "csr_inverter");
    job.setSortComparatorClass(SortComparator.class);
    job.waitForCompletion(true);
}

From source file:org.acacia.csr.java.LineCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    /*//  ww  w  . j  a  va  2s .com
      String dir1 = "/user/miyuru/wcout";
      String dir2 = "/user/miyuru/lcout";
       //We first delete the temporary directories if they exist on the HDFS
        FileSystem fs1 = FileSystem.get(new JobConf());
                
       if(fs1.exists(new Path(dir2))){
          fs1.delete(new Path(dir2), true);
       }
            
        JobConf conf = new JobConf(LineCount.class);
        conf.setJobName("LineCount");
               
        conf.setOutputKeyClass(IntWritable.class);
        conf.setOutputValueClass(IntWritable.class);
               
        conf.setMapperClass(Map.class);
        conf.setCombinerClass(Reduce.class);
        conf.setReducerClass(Reduce.class);
               
        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
               
        FileInputFormat.setInputPaths(conf, new Path(dir1));
        FileOutputFormat.setOutputPath(conf, new Path(dir2));
               
        Job job = new Job(conf, "line count");
        job.waitForCompletion(true); 
        org.apache.hadoop.mapreduce.Counters cntr = job.getCounters();
        System .out.println("Number of lines in the file" + cntr.findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue());
        */

    long edgeCount = 0;
    //String dir3 = "/user/miyuru/wcout";
    String dir4 = "/user/miyuru/lcout";
    String dir5 = "/user/miyuru/input";
    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs2 = FileSystem.get(new JobConf());

    if (fs2.exists(new Path(dir4))) {
        fs2.delete(new Path(dir4), true);
    }

    JobConf conf1 = new JobConf(LineCount.class);
    conf1.setJobName("LineCount");

    conf1.setOutputKeyClass(Text.class);
    conf1.setOutputValueClass(IntWritable.class);

    conf1.setMapperClass(Map.class);
    conf1.setCombinerClass(Reduce.class);
    conf1.setReducerClass(Reduce.class);

    conf1.setInputFormat(TextInputFormat.class);
    conf1.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf1, new Path(dir5));
    FileOutputFormat.setOutputPath(conf1, new Path(dir4));

    Job job1 = new Job(conf1, "line count");
    job1.setNumReduceTasks(0);
    job1.waitForCompletion(true);
    org.apache.hadoop.mapreduce.Counters cntr = job1.getCounters();
    edgeCount = cntr.findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue();

    File efile = new File("/tmp/efile");

    if (efile.exists()) {
        efile.delete();
    }

    PrintWriter writer = new PrintWriter("/tmp/efile", "UTF-8");
    writer.println(edgeCount);
    writer.flush();
    writer.close();

    //edgeCount = edgeCount -1;//This is to remove the line number additionlly added to each edgelist file by HDFS. This is strange, but it happens.
    System.out.println("======>Edge count is : " + edgeCount);
    System.out.println("------Done Line Count---------------");
}

From source file:org.acacia.csr.java.NotInFinder.java

License:Apache License

public static void main(String[] args) throws Exception {
    String dir1 = "/user/miyuru/wcout";
    String dir2 = "/user/miyuru/notinverts";
    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());

    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }/* w w w  .java  2s.  c o  m*/

    JobConf conf = new JobConf();
    conf.setNumMapTasks(96);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(LongWritable.class);
    conf.setMapperClass(TokenizerMapper.class);
    conf.setReducerClass(IntSumReducer.class);
    conf.setCombinerClass(IntSumReducer.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(dir1));
    FileOutputFormat.setOutputPath(conf, new Path(dir2));
    Job job = new Job(conf, "NotInFinder");
    job.setJarByClass(WordCount.class);
    //   job.setMapperClass(TokenizerMapper.class);
    //   job.setCombinerClass(IntSumReducer.class);
    //   job.setReducerClass(IntSumReducer.class);
    //   job.setOutputKeyClass(LongWritable.class);
    //   job.setOutputValueClass(LongWritable.class);

    job.setSortComparatorClass(SortComparator.class);
    job.waitForCompletion(true);

}

From source file:org.acacia.partitioner.java.EdgeDistributor.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    String dir1 = "/user/miyuru/input";
    String dir2 = "/user/miyuru/edgedistributed-out";

    //      //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }/*from  w  ww  .  ja v  a 2s  .  c  om*/

    //First job scans through the edge list and splits the edges in to separate files based on the partitioned vertex files.

    JobConf conf = new JobConf(EdgeDistributor.class);
    conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[0]);
    conf.set("org.acacia.partitioner.hbase.table", args[1]);
    conf.set("org.acacia.partitioner.index.contacthost", args[2]);
    conf.set("vert-count", args[3]);
    conf.set("initpartition-id", args[4]);
    conf.set("zero-flag", args[5]);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(FileMapper.class);
    conf.setReducerClass(FileReducer.class);
    //conf.setInputFormat(TextInputFormat.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setNumReduceTasks(96); //Need to specify the number of reduce tasks explicitly. Otherwise it creates only one reduce task.

    FileInputFormat.setInputPaths(conf, new Path(dir1));
    FileOutputFormat.setOutputPath(conf, new Path(dir2));

    MultipleOutputs.addMultiNamedOutput(conf, "partition", TextOutputFormat.class, NullWritable.class,
            Text.class);

    Job job = new Job(conf, "EdgeDistributor");
    job.waitForCompletion(true);

    System.out.println("Done job EdgeDistribution");
}