Example usage for org.apache.hadoop.mapreduce Job submit

List of usage examples for org.apache.hadoop.mapreduce Job submit

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job submit.

Prototype

public void submit() throws IOException, InterruptedException, ClassNotFoundException 

Source Link

Document

Submit the job to the cluster and return immediately.

Usage

From source file:org.qcri.pca.ReconstructionErrJob.java

/**
 * Refer to {@link ReconstructionErrJob} for explanation of the job
 * //from  w  ww . java2 s  .com
 * @param conf
 *          the configuration
 * @param yPath
 *          the path to input matrix Y
 * @param y2xPath
 *          the path to in-memory matrix Y2X, where X = Y * Y2X
 * @param yCols
 *          the number of columns in Y
 * @param xCols
 *          the number of columns in X
 * @param cPath
 *          the path to in-memory matrix C, where ReconY = Xc * C'
 * @param zmPath
 *          the path to vector Zm, where Zm = Ym * Y2X * C' - Ym
 * @param ymPath
 *          the path the the mean vector Ym
 * @param outPath
 *          the output path
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void run(Configuration conf, Path yPath, Path y2xPath, int yCols, int xCols, Path cPath, String zmPath,
        String ymPath, Path outPath, final float ERR_SAMPLE_RATE)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MATRIXY2X, y2xPath.toString());
    conf.set(RECONSTRUCTIONMATRIX, cPath.toString());
    conf.set(ZMPATH, zmPath);
    conf.set(YMPATH, ymPath);
    conf.setInt(YCOLS, yCols);
    conf.setInt(XCOLS, xCols);
    conf.set(ERRSAMPLERATE, "" + ERR_SAMPLE_RATE);
    FileSystem fs = FileSystem.get(yPath.toUri(), conf);
    yPath = fs.makeQualified(yPath);
    outPath = fs.makeQualified(outPath);
    Job job = new Job(conf);
    FileInputFormat.addInputPath(job, yPath);
    FileOutputFormat.setOutputPath(job, outPath);
    job.setJobName("ReconErrJob-" + yPath.getName());
    job.setJarByClass(ReconstructionErrJob.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(1);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setNumReduceTasks(1);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.submit();
    job.waitForCompletion(true);
}

From source file:org.qcri.pca.VarianceJob.java

public void run(Configuration conf, Path yPath, String ymPath, String matrixY2XDir, String xmPath,
        String matrixCDir, Path outPath) throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MATRIXY2X, matrixY2XDir);/*from w ww  .ja  v  a  2 s  . c  om*/
    conf.set(MATRIXC, matrixCDir);
    conf.set(XMPATH, xmPath);
    conf.set(YMPATH, ymPath);
    FileSystem fs = FileSystem.get(yPath.toUri(), conf);
    yPath = fs.makeQualified(yPath);
    outPath = fs.makeQualified(outPath);
    Job job = new Job(conf);
    FileInputFormat.addInputPath(job, yPath);
    FileOutputFormat.setOutputPath(job, outPath);
    job.setJobName("VarianceJob-" + yPath.getName());
    job.setJarByClass(VarianceJob.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(1);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.submit();
    job.waitForCompletion(true);
}

From source file:org.sample.hadoop.WordCountV2.java

License:Open Source License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJobName("wordcount");
    job.setJarByClass(this.getClass());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; i++) {
        if ("-skip".equals(args[i])) {
            DistributedCache.addCacheFile(new Path(args[++i]).toUri(), job.getConfiguration());
            job.getConfiguration().setBoolean("wordcount.skip.patterns", true);
        } else {/*  ww  w. j  a  v  a 2s.  co m*/
            other_args.add(args[i]);
        }
    }
    FileInputFormat.addInputPath(job, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(job, new Path(other_args.get(1)));
    job.submit();
    return 0;
}

From source file:org.seqdoop.hadoop_bam.cli.plugins.FixMate.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("fixmate :: WORKDIR not given.");
        return 3;
    }//  ww w  .  jav  a  2s . c o m
    if (args.size() == 1) {
        System.err.println("fixmate :: INPATH not given.");
        return 3;
    }
    if (!cacheAndSetProperties(parser))
        return 3;

    final ValidationStringency stringency = Utils.toStringency(
            parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()),
            "fixmate");
    if (stringency == null)
        return 3;

    Path wrkDir = new Path(args.get(0));

    final List<String> strInputs = args.subList(1, args.size());
    final List<Path> inputs = new ArrayList<Path>(strInputs.size());
    for (final String in : strInputs)
        inputs.add(new Path(in));

    final Configuration conf = getConf();

    // Used by Utils.getMergeableWorkFile() to name the output files.
    final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName();
    conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName);

    if (stringency != null)
        conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString());

    final boolean globalSort = parser.getBoolean(sortOpt);
    if (globalSort)
        Utils.setHeaderMergerSortOrder(conf, SAMFileHeader.SortOrder.queryname);

    conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0]));

    final Timer t = new Timer();
    try {
        // Required for path ".", for example.
        wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir);

        if (globalSort)
            Utils.configureSampling(wrkDir, intermediateOutName, conf);

        final Job job = new Job(conf);

        job.setJarByClass(FixMate.class);
        job.setMapperClass(FixMateMapper.class);
        job.setReducerClass(FixMateReducer.class);

        if (!parser.getBoolean(noCombinerOpt))
            job.setCombinerClass(FixMateReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(SAMRecordWritable.class);

        job.setInputFormatClass(AnySAMInputFormat.class);
        job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class);

        for (final Path in : inputs)
            FileInputFormat.addInputPath(job, in);

        FileOutputFormat.setOutputPath(job, wrkDir);

        if (globalSort) {
            job.setPartitionerClass(TotalOrderPartitioner.class);

            System.out.println("fixmate :: Sampling...");
            t.start();

            InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job,
                    new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000,
                            Math.max(100, reduceTasks)));

            System.out.printf("fixmate :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms());
        }

        job.submit();

        System.out.println("fixmate :: Waiting for job completion...");
        t.start();

        if (!job.waitForCompletion(verbose)) {
            System.err.println("fixmate :: Job failed.");
            return 4;
        }

        System.out.printf("fixmate :: Job complete in %d.%03d s.\n", t.stopS(), t.fms());

    } catch (IOException e) {
        System.err.printf("fixmate :: Hadoop error: %s\n", e);
        return 4;
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

    if (outPath != null)
        try {
            Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "fixmate");
        } catch (IOException e) {
            System.err.printf("fixmate :: Output merging failed: %s\n", e);
            return 5;
        }
    return 0;
}

From source file:org.seqdoop.hadoop_bam.cli.plugins.Sort.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("sort :: WORKDIR not given.");
        return 3;
    }/*from w w w .ja  v a2s . c om*/
    if (args.size() == 1) {
        System.err.println("sort :: INPATH not given.");
        return 3;
    }
    if (!cacheAndSetProperties(parser))
        return 3;

    final ValidationStringency stringency = Utils.toStringency(
            parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "sort");
    if (stringency == null)
        return 3;

    Path wrkDir = new Path(args.get(0));

    final List<String> strInputs = args.subList(1, args.size());
    final List<Path> inputs = new ArrayList<Path>(strInputs.size());
    for (final String in : strInputs)
        inputs.add(new Path(in));

    final Configuration conf = getConf();

    Utils.setHeaderMergerSortOrder(conf, SortOrder.coordinate);
    conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0]));

    if (stringency != null)
        conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString());

    // Used by Utils.getMergeableWorkFile() to name the output files.
    final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName();
    conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName);

    final Timer t = new Timer();
    try {
        // Required for path ".", for example.
        wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir);

        Utils.configureSampling(wrkDir, intermediateOutName, conf);

        final Job job = new Job(conf);

        job.setJarByClass(Sort.class);
        job.setMapperClass(Mapper.class);
        job.setReducerClass(SortReducer.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(SAMRecordWritable.class);

        job.setInputFormatClass(SortInputFormat.class);
        job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class);

        for (final Path in : inputs)
            FileInputFormat.addInputPath(job, in);

        FileOutputFormat.setOutputPath(job, wrkDir);

        job.setPartitionerClass(TotalOrderPartitioner.class);

        System.out.println("sort :: Sampling...");
        t.start();

        InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job,
                new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000,
                        Math.max(100, reduceTasks)));

        System.out.printf("sort :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms());

        job.submit();

        System.out.println("sort :: Waiting for job completion...");
        t.start();

        if (!job.waitForCompletion(verbose)) {
            System.err.println("sort :: Job failed.");
            return 4;
        }

        System.out.printf("sort :: Job complete in %d.%03d s.\n", t.stopS(), t.fms());

    } catch (IOException e) {
        System.err.printf("sort :: Hadoop error: %s\n", e);
        return 4;
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

    if (outPath != null)
        try {
            Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "sort");
        } catch (IOException e) {
            System.err.printf("sort :: Output merging failed: %s\n", e);
            return 5;
        }
    return 0;
}

From source file:org.seqdoop.hadoop_bam.examples.ReadFilter.java

License:Open Source License

public int run(String[] args) throws Exception {
    final Configuration conf = getConf();
    //vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
    //The path to the header for the output file, make sure this is correctly specified!
    conf.set(MyOutputFormat.HEADER_FROM_FILE, "/user/ubuntu/header.bam");
    //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    final Job job = new Job(conf);

    job.setJarByClass(ReadFilter.class);
    job.setMapperClass(ReadFilterMapper.class);
    job.setReducerClass(ReadFilterReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(SAMRecordWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SAMRecordWritable.class);

    job.setInputFormatClass(AnySAMInputFormat.class);
    job.setOutputFormatClass(ReadFilter.MyOutputFormat.class);

    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job, new Path(args[0]));

    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.submit();

    if (!job.waitForCompletion(true)) {
        System.err.println("sort :: Job failed.");
        return 1;
    }/*from w  w  w  . j a v a2 s.c  om*/

    return 0;
}

From source file:org.seqdoop.hadoop_bam.examples.TestBAM.java

License:Open Source License

public int run(String[] args) throws Exception {
    final Configuration conf = getConf();

    conf.set(MyOutputFormat.HEADER_FROM_FILE, args[0]);

    final Job job = new Job(conf);

    job.setJarByClass(TestBAM.class);
    job.setMapperClass(TestBAMMapper.class);
    job.setReducerClass(TestBAMReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(SAMRecordWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SAMRecordWritable.class);

    job.setInputFormatClass(AnySAMInputFormat.class);
    job.setOutputFormatClass(TestBAM.MyOutputFormat.class);

    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0]));

    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.submit();

    if (!job.waitForCompletion(true)) {
        System.err.println("sort :: Job failed.");
        return 1;
    }//from   ww w . j  av a2 s . c om

    return 0;
}

From source file:org.seqdoop.hadoop_bam.examples.TestVCF.java

License:Open Source License

public int run(String[] args) throws Exception {
    final Configuration conf = getConf();

    conf.set(VCFOutputFormat.OUTPUT_VCF_FORMAT_PROPERTY, "VCF");
    conf.set(MyVCFOutputFormat.INPUT_PATH_PROP, args[0]);

    final Job job = new Job(conf);

    job.setJarByClass(TestVCF.class);
    job.setMapperClass(TestVCFMapper.class);
    job.setReducerClass(TestVCFReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VariantContextWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VariantContextWritable.class);

    job.setInputFormatClass(VCFInputFormat.class);
    job.setOutputFormatClass(MyVCFOutputFormat.class);

    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0]));

    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.submit();

    if (!job.waitForCompletion(true)) {
        System.err.println("sort :: Job failed.");
        return 1;
    }/* w w w.  j  a v a 2s. c  o  m*/

    return 0;
}

From source file:org.springframework.data.hadoop.mapreduce.JobExecutor.java

License:Apache License

protected Collection<Job> startJobs(final JobListener listener) {
    final Collection<Job> jbs = findJobs();

    final List<Job> started = new ArrayList<Job>();

    taskExecutor.execute(new Runnable() {
        @Override/*from   ww  w  .  j  a  v a 2s.  com*/
        public void run() {

            Object listenerInit = null;
            if (listener != null) {
                listenerInit = listener.beforeAction();
            }

            try {

                for (final Job job : jbs) {
                    boolean succes = false;
                    try {
                        // job is already running - ignore it
                        if (JobUtils.getStatus(job).isStarted()) {
                            log.info("Job [" + job.getJobName() + "] already started; skipping it...");
                            break;
                        }

                        log.info("Starting job [" + job.getJobName() + "]");
                        synchronized (started) {
                            started.add(job);
                        }
                        if (!waitForCompletion) {
                            succes = true;
                            job.submit();
                        } else {
                            succes = job.waitForCompletion(verbose);
                            log.info("Completed job [" + job.getJobName() + "]");
                            if (listener != null) {
                                listener.jobFinished(job);
                            }

                        }
                    } catch (InterruptedException ex) {
                        log.warn("Job [" + job.getJobName() + "] killed");
                        throw new IllegalStateException(ex);
                    } catch (Exception ex) {
                        log.warn("Cannot start job [" + job.getJobName() + "]", ex);
                        throw new IllegalStateException(ex);
                    }

                    if (!succes) {
                        if (!shuttingDown) {
                            JobStatus status = JobUtils.getStatus(job);
                            if (JobStatus.KILLED == status) {
                                throw new IllegalStateException("Job " + job.getJobName() + "] killed");
                            } else {
                                throw new IllegalStateException(
                                        "Job " + job.getJobName() + "] failed to start; status=" + status);
                            }
                        } else {
                            log.info("Job [" + job.getJobName() + "] killed by shutdown");
                        }
                    }
                }
            } finally {
                if (listener != null) {
                    listener.afterAction(listenerInit);
                }
            }
        }
    });

    return started;
}

From source file:parquet.hadoop2.TestInputOutputFormat.java

License:Apache License

@Test
public void testReadWrite() throws IOException, ClassNotFoundException, InterruptedException {
    final Configuration conf = new Configuration();
    final Path inputPath = new Path("src/test/java/parquet/hadoop2/TestInputOutputFormat.java");
    final Path parquetPath = new Path("target/test/hadoop2/example/TestInputOutputFormat/parquet");
    final Path outputPath = new Path("target/test/hadoop2/example/TestInputOutputFormat/out");
    final FileSystem fileSystem = parquetPath.getFileSystem(conf);
    fileSystem.delete(parquetPath, true);
    fileSystem.delete(outputPath, true);
    {//from   w  w w . j  a  v  a  2  s  .c o  m
        final Job job = new Job(conf, "write");
        TextInputFormat.addInputPath(job, inputPath);
        job.setInputFormatClass(TextInputFormat.class);
        job.setNumReduceTasks(0);
        ExampleOutputFormat.setCompression(job, CompressionCodecName.GZIP);
        ExampleOutputFormat.setOutputPath(job, parquetPath);
        job.setOutputFormatClass(ExampleOutputFormat.class);
        job.setMapperClass(TestInputOutputFormat.MyMapper.class);
        ExampleOutputFormat.setSchema(job, MessageTypeParser.parseMessageType(
                "message example {\n" + "required int32 line;\n" + "required binary content;\n" + "}"));
        job.submit();
        waitForJob(job);
    }
    {
        final Job job = new Job(conf, "read");
        job.setInputFormatClass(ExampleInputFormat.class);
        ExampleInputFormat.setInputPaths(job, parquetPath);
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, outputPath);
        job.setMapperClass(TestInputOutputFormat.MyMapper2.class);
        job.setNumReduceTasks(0);
        job.submit();
        waitForJob(job);
    }

    final BufferedReader in = new BufferedReader(new FileReader(new File(inputPath.toString())));
    final BufferedReader out = new BufferedReader(
            new FileReader(new File(outputPath.toString(), "part-m-00000")));
    String lineIn;
    String lineOut = null;
    int lineNumber = 0;
    while ((lineIn = in.readLine()) != null && (lineOut = out.readLine()) != null) {
        ++lineNumber;
        lineOut = lineOut.substring(lineOut.indexOf("\t") + 1);
        assertEquals("line " + lineNumber, lineIn, lineOut);
    }
    assertNull("line " + lineNumber, lineIn);
    assertNull("line " + lineNumber, out.readLine());
    assertTrue(lineNumber > 0);
    in.close();
    out.close();
}