Example usage for org.apache.hadoop.mapreduce Job submit

List of usage examples for org.apache.hadoop.mapreduce Job submit

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job submit.

Prototype

public void submit() throws IOException, InterruptedException, ClassNotFoundException 

Source Link

Document

Submit the job to the cluster and return immediately.

Usage

From source file:edu.umn.cs.spatialHadoop.visualization.SingleLevelPlot.java

License:Open Source License

/**
 * Generates a single level using a MapReduce job and returns the created job.
 * @param inFiles//from   www.jav  a  2s.co m
 * @param outFile
 * @param plotterClass
 * @param params
 * @return
 * @throws IOException
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public static Job plotMapReduce(Path[] inFiles, Path outFile, Class<? extends Plotter> plotterClass,
        OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException {
    Plotter plotter;
    try {
        plotter = plotterClass.newInstance();
    } catch (InstantiationException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    } catch (IllegalAccessException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    }

    Job job = new Job(params, "SingleLevelPlot");
    job.setJarByClass(SingleLevelPlot.class);
    job.setJobName("SingleLevelPlot");
    // Set plotter
    Configuration conf = job.getConfiguration();
    Plotter.setPlotter(conf, plotterClass);
    // Set input file MBR
    Rectangle inputMBR = (Rectangle) params.getShape("mbr");
    Rectangle drawRect = (Rectangle) params.getShape("rect");
    if (inputMBR == null)
        inputMBR = drawRect != null ? drawRect : FileMBR.fileMBR(inFiles, params);
    OperationsParams.setShape(conf, InputMBR, inputMBR);
    if (drawRect != null)
        OperationsParams.setShape(conf, SpatialInputFormat3.InputQueryRange, drawRect);

    // Adjust width and height if aspect ratio is to be kept
    int imageWidth = conf.getInt("width", 1000);
    int imageHeight = conf.getInt("height", 1000);
    if (params.getBoolean("keepratio", true)) {
        // Adjust width and height to maintain aspect ratio
        if (inputMBR.getWidth() / inputMBR.getHeight() > (double) imageWidth / imageHeight) {
            // Fix width and change height
            imageHeight = (int) (inputMBR.getHeight() * imageWidth / inputMBR.getWidth());
            // Make divisible by two for compatibility with ffmpeg
            if (imageHeight % 2 == 1)
                imageHeight--;
            conf.setInt("height", imageHeight);
        } else {
            imageWidth = (int) (inputMBR.getWidth() * imageHeight / inputMBR.getHeight());
            conf.setInt("width", imageWidth);
        }
    }

    boolean merge = conf.getBoolean("merge", true);
    // Set input and output
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inFiles);
    if (conf.getBoolean("output", true)) {
        if (merge) {
            job.setOutputFormatClass(CanvasOutputFormat.class);
            conf.setClass("mapred.output.committer.class", CanvasOutputFormat.ImageWriterOld.class,
                    org.apache.hadoop.mapred.OutputCommitter.class);
        } else {
            job.setOutputFormatClass(ImageOutputFormat.class);
        }
        CanvasOutputFormat.setOutputPath(job, outFile);
    } else {
        job.setOutputFormatClass(NullOutputFormat.class);
    }

    // Set mapper and reducer based on the partitioning scheme
    String partition = conf.get("partition", "none");
    ClusterStatus clusterStatus = new JobClient(new JobConf()).getClusterStatus();
    if (partition.equalsIgnoreCase("none")) {
        LOG.info("Using no-partition plot");
        job.setMapperClass(NoPartitionPlotMap.class);
        job.setCombinerClass(NoPartitionPlotCombine.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(plotter.getCanvasClass());
        if (merge) {
            int numSplits = new SpatialInputFormat3().getSplits(job).size();
            job.setReducerClass(NoPartitionPlotReduce.class);
            // Set number of reduce tasks according to cluster status
            int maxReduce = Math.max(1, clusterStatus.getMaxReduceTasks() * 7 / 8);
            job.setNumReduceTasks(Math.max(1, Math.min(maxReduce, numSplits / maxReduce)));
        } else {
            job.setNumReduceTasks(0);
        }
    } else {
        LOG.info("Using repartition plot");
        Partitioner partitioner;
        if (partition.equals("pixel")) {
            // Special case for pixel level partitioning as it depends on the
            // visualization parameters
            partitioner = new GridPartitioner(inputMBR, imageWidth, imageHeight);
        } else if (partition.equals("grid")) {
            int numBlocks = 0;
            for (Path in : inFiles) {
                FileSystem fs = in.getFileSystem(params);
                long size = FileUtil.getPathSize(fs, in);
                long blockSize = fs.getDefaultBlockSize(in);
                numBlocks += Math.ceil(size / (double) blockSize);
            }
            int numPartitions = numBlocks * 1000;
            int gridSize = (int) Math.ceil(Math.sqrt(numPartitions));
            partitioner = new GridPartitioner(inputMBR, gridSize, gridSize);
        } else {
            // Use a standard partitioner as created by the indexer
            partitioner = Indexer.createPartitioner(inFiles, outFile, conf, partition);
        }
        Shape shape = params.getShape("shape");
        job.setMapperClass(RepartitionPlotMap.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(shape.getClass());
        job.setReducerClass(RepartitionPlotReduce.class);
        // Set number of reducers according to cluster size
        job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks() * 9 / 10));
        Partitioner.setPartitioner(conf, partitioner);
    }

    // Use multithreading in case the job is running locally
    conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors());

    // Start the job
    if (params.getBoolean("background", false)) {
        // Run in background
        job.submit();
    } else {
        job.waitForCompletion(params.getBoolean("verbose", false));
    }
    return job;
}

From source file:edu.umn.cs.sthadoop.trajectory.TrajectoryOverlap.java

License:Open Source License

public static Job rangeQueryMapReduce(Path inFile, Path outFile, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    // Use the built-in range filter of the input format
    params.set(SpatialInputFormat3.InputQueryRange, params.get("rect"));
    // Use multithreading in case it is running locally
    params.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors());

    Job job = new Job(params, "Traj-KNN-distance");
    job.setJarByClass(RangeQuery.class);
    job.setNumReduceTasks(0);//from w ww. j av  a  2 s.  c o  m

    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inFile);

    job.setMapperClass(RangeQueryMap.class);

    if (params.getBoolean("output", true) && outFile != null) {
        job.setOutputFormatClass(TextOutputFormat3.class);
        TextOutputFormat3.setOutputPath(job, outFile);
    } else {
        // Skip writing the output for the sake of debugging
        job.setOutputFormatClass(NullOutputFormat.class);
    }
    // Submit the job
    if (!params.getBoolean("background", false)) {
        job.waitForCompletion(false);
    } else {
        job.submit();
    }
    return job;
}

From source file:ExceptionLicenses.Driver.java

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.out.println("usage: [input] [output]");
        System.exit(-1);// w  ww . j av  a2s .com
    }
    Job job = Job.getInstance(new Configuration());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    //Key Value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(LiecensesMapper.class);
    job.setReducerClass(LiecensesReducer.class);

    job.setInputFormatClass(LiecenseInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setJarByClass(Driver.class);
    job.submit();

}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.Summarize.java

License:Open Source License

private boolean runSummary(Path bamPath) throws IOException, ClassNotFoundException, InterruptedException {
    final Configuration conf = getConf();
    Utils.configureSampling(wrkDir, bamPath.getName(), conf);
    final Job job = new Job(conf);

    job.setJarByClass(Summarize.class);
    job.setMapperClass(Mapper.class);
    job.setReducerClass(SummarizeReducer.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Range.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(RangeCount.class);

    job.setInputFormatClass(SummarizeInputFormat.class);
    job.setOutputFormatClass(SummarizeOutputFormat.class);

    FileInputFormat.setInputPaths(job, bamPath);
    FileOutputFormat.setOutputPath(job, wrkDir);

    job.setPartitionerClass(TotalOrderPartitioner.class);

    System.out.println("summarize :: Sampling...");
    t.start();/*w  w w .  j  a  va 2 s . c om*/

    InputSampler.<LongWritable, Range>writePartitionFile(job,
            new InputSampler.RandomSampler<LongWritable, Range>(0.01, 10000, Math.max(100, reduceTasks)));

    System.out.printf("summarize :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms());

    for (String lvl : levels) {
        MultipleOutputs.addNamedOutput(job, getSummaryName(lvl, false), SummarizeOutputFormat.class,
                NullWritable.class, Range.class);
        MultipleOutputs.addNamedOutput(job, getSummaryName(lvl, true), SummarizeOutputFormat.class,
                NullWritable.class, Range.class);
    }

    job.submit();

    System.out.println("summarize :: Waiting for job completion...");
    t.start();

    if (!job.waitForCompletion(verbose)) {
        System.err.println("summarize :: Job failed.");
        return false;
    }

    System.out.printf("summarize :: Job complete in %d.%03d s.\n", t.stopS(), t.fms());
    return true;
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java

License:Open Source License

static Job sortOne(Configuration conf, Path inputFile, Path outputDir, String commandName,
        String samplingInfo) throws IOException, ClassNotFoundException, InterruptedException {
    conf.set(Utils.WORK_FILENAME_PROPERTY, inputFile.getName());
    Utils.configureSampling(outputDir, inputFile.getName(), conf);
    final Job job = new Job(conf);

    job.setJarByClass(Summarize.class);
    job.setMapperClass(Mapper.class);
    job.setReducerClass(SortReducer.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SortInputFormat.class);
    job.setOutputFormatClass(SortOutputFormat.class);

    FileInputFormat.setInputPaths(job, inputFile);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setPartitionerClass(TotalOrderPartitioner.class);

    final Timer t = new Timer();

    System.out.printf("%s :: Sampling%s...\n", commandName, samplingInfo);
    t.start();/*from   ww w .j  a  v a 2s.co m*/

    InputSampler.<LongWritable, Text>writePartitionFile(job, new InputSampler.SplitSampler<LongWritable, Text>(
            Math.max(1 << 16, conf.getInt("mapred.reduce.tasks", 1)), 10));

    System.out.printf("%s :: Sampling complete in %d.%03d s.\n", commandName, t.stopS(), t.fms());
    job.submit();
    return job;
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.FixMate.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("fixmate :: WORKDIR not given.");
        return 3;
    }/*from w ww.  j a  va 2 s .c o  m*/
    if (args.size() == 1) {
        System.err.println("fixmate :: INPATH not given.");
        return 3;
    }
    if (!cacheAndSetProperties(parser))
        return 3;

    final SAMFileReader.ValidationStringency stringency = Utils.toStringency(parser.getOptionValue(
            stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "fixmate");
    if (stringency == null)
        return 3;

    Path wrkDir = new Path(args.get(0));

    final List<String> strInputs = args.subList(1, args.size());
    final List<Path> inputs = new ArrayList<Path>(strInputs.size());
    for (final String in : strInputs)
        inputs.add(new Path(in));

    final Configuration conf = getConf();

    // Used by Utils.getMergeableWorkFile() to name the output files.
    final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName();
    conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName);

    if (stringency != null)
        conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString());

    final boolean globalSort = parser.getBoolean(sortOpt);
    if (globalSort)
        Utils.setHeaderMergerSortOrder(conf, SAMFileHeader.SortOrder.queryname);

    conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0]));

    final Timer t = new Timer();
    try {
        // Required for path ".", for example.
        wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir);

        if (globalSort)
            Utils.configureSampling(wrkDir, intermediateOutName, conf);

        final Job job = new Job(conf);

        job.setJarByClass(FixMate.class);
        job.setMapperClass(FixMateMapper.class);
        job.setReducerClass(FixMateReducer.class);

        if (!parser.getBoolean(noCombinerOpt))
            job.setCombinerClass(FixMateReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(SAMRecordWritable.class);

        job.setInputFormatClass(AnySAMInputFormat.class);
        job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class);

        for (final Path in : inputs)
            FileInputFormat.addInputPath(job, in);

        FileOutputFormat.setOutputPath(job, wrkDir);

        if (globalSort) {
            job.setPartitionerClass(TotalOrderPartitioner.class);

            System.out.println("fixmate :: Sampling...");
            t.start();

            InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job,
                    new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000,
                            Math.max(100, reduceTasks)));

            System.out.printf("fixmate :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms());
        }

        job.submit();

        System.out.println("fixmate :: Waiting for job completion...");
        t.start();

        if (!job.waitForCompletion(verbose)) {
            System.err.println("fixmate :: Job failed.");
            return 4;
        }

        System.out.printf("fixmate :: Job complete in %d.%03d s.\n", t.stopS(), t.fms());

    } catch (IOException e) {
        System.err.printf("fixmate :: Hadoop error: %s\n", e);
        return 4;
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

    if (outPath != null)
        try {
            Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "fixmate");
        } catch (IOException e) {
            System.err.printf("fixmate :: Output merging failed: %s\n", e);
            return 5;
        }
    return 0;
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.Sort.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("sort :: WORKDIR not given.");
        return 3;
    }/*from www.  jav a  2 s  .co m*/
    if (args.size() == 1) {
        System.err.println("sort :: INPATH not given.");
        return 3;
    }
    if (!cacheAndSetProperties(parser))
        return 3;

    final SAMFileReader.ValidationStringency stringency = Utils.toStringency(parser.getOptionValue(
            stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "sort");
    if (stringency == null)
        return 3;

    Path wrkDir = new Path(args.get(0));

    final List<String> strInputs = args.subList(1, args.size());
    final List<Path> inputs = new ArrayList<Path>(strInputs.size());
    for (final String in : strInputs)
        inputs.add(new Path(in));

    final Configuration conf = getConf();

    Utils.setHeaderMergerSortOrder(conf, SortOrder.coordinate);
    conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0]));

    if (stringency != null)
        conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString());

    // Used by Utils.getMergeableWorkFile() to name the output files.
    final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName();
    conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName);

    final Timer t = new Timer();
    try {
        // Required for path ".", for example.
        wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir);

        Utils.configureSampling(wrkDir, intermediateOutName, conf);

        final Job job = new Job(conf);

        job.setJarByClass(Sort.class);
        job.setMapperClass(Mapper.class);
        job.setReducerClass(SortReducer.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(SAMRecordWritable.class);

        job.setInputFormatClass(SortInputFormat.class);
        job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class);

        for (final Path in : inputs)
            FileInputFormat.addInputPath(job, in);

        FileOutputFormat.setOutputPath(job, wrkDir);

        job.setPartitionerClass(TotalOrderPartitioner.class);

        System.out.println("sort :: Sampling...");
        t.start();

        InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job,
                new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000,
                        Math.max(100, reduceTasks)));

        System.out.printf("sort :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms());

        job.submit();

        System.out.println("sort :: Waiting for job completion...");
        t.start();

        if (!job.waitForCompletion(verbose)) {
            System.err.println("sort :: Job failed.");
            return 4;
        }

        System.out.printf("sort :: Job complete in %d.%03d s.\n", t.stopS(), t.fms());

    } catch (IOException e) {
        System.err.printf("sort :: Hadoop error: %s\n", e);
        return 4;
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

    if (outPath != null)
        try {
            Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "sort");
        } catch (IOException e) {
            System.err.printf("sort :: Output merging failed: %s\n", e);
            return 5;
        }
    return 0;
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.VCFSort.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("vcf-sort :: WORKDIR not given.");
        return 3;
    }//from www.jav  a  2 s  .c  o m
    if (args.size() == 1) {
        System.err.println("vcf-sort :: INPATH not given.");
        return 3;
    }
    if (!cacheAndSetProperties(parser))
        return 3;

    Path wrkDir = new Path(args.get(0));
    final Path inPath = new Path(args.get(1));

    final Configuration conf = getConf();

    VCFFormat vcfFormat = null;

    final String f = (String) parser.getOptionValue(formatOpt);
    if (f != null) {
        try {
            vcfFormat = VCFFormat.valueOf(f.toUpperCase(Locale.ENGLISH));
        } catch (IllegalArgumentException e) {
            System.err.printf("%s :: invalid format '%s'\n", getCommandName(), f);
            return 3;
        }
    }
    if (vcfFormat == null)
        vcfFormat = outPath == null ? VCFFormat.BCF : VCFFormat.inferFromFilePath(outPath);

    conf.setBoolean(VCFInputFormat.TRUST_EXTS_PROPERTY, !parser.getBoolean(noTrustExtsOpt));

    conf.setBoolean(KeyIgnoringVCFOutputFormat.WRITE_HEADER_PROPERTY, outPath == null);

    conf.set(VCFOutputFormat.OUTPUT_VCF_FORMAT_PROPERTY, vcfFormat.toString());

    // Used by Utils.getMergeableWorkFile() to name the output files.
    final String intermediateOutName = (outPath == null ? inPath : outPath).getName();
    conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName);

    conf.set(SortOutputFormat.INPUT_PATH_PROP, inPath.toString());

    final Timer t = new Timer();
    try {
        // Required for path ".", for example.
        wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir);

        Utils.configureSampling(wrkDir, intermediateOutName, conf);

        final Job job = new Job(conf);

        job.setJarByClass(VCFSort.class);
        job.setMapperClass(Mapper.class);
        job.setReducerClass(VCFSortReducer.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(VariantContextWritable.class);

        job.setInputFormatClass(VCFInputFormat.class);
        job.setOutputFormatClass(SortOutputFormat.class);

        FileInputFormat.addInputPath(job, inPath);
        FileOutputFormat.setOutputPath(job, wrkDir);

        job.setPartitionerClass(TotalOrderPartitioner.class);

        System.out.println("vcf-sort :: Sampling...");
        t.start();

        InputSampler.<LongWritable, VariantContextWritable>writePartitionFile(job,
                new InputSampler.RandomSampler<LongWritable, VariantContextWritable>(0.01, 10000,
                        Math.max(100, reduceTasks)));

        System.out.printf("vcf-sort :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms());

        job.submit();

        System.out.println("vcf-sort :: Waiting for job completion...");
        t.start();

        if (!job.waitForCompletion(verbose)) {
            System.err.println("vcf-sort :: Job failed.");
            return 4;
        }

        System.out.printf("vcf-sort :: Job complete in %d.%03d s.\n", t.stopS(), t.fms());

    } catch (IOException e) {
        System.err.printf("vcf-sort :: Hadoop error: %s\n", e);
        return 4;
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

    if (outPath != null)
        try {
            System.out.println("vcf-sort :: Merging output...");
            t.start();

            final OutputStream outs = outPath.getFileSystem(conf).create(outPath);

            // First, place the VCF or BCF header.

            final WrapSeekable ins = WrapSeekable.openPath(conf, inPath);
            final VCFHeader header = VCFHeaderReader.readHeaderFrom(ins);
            ins.close();

            final VariantContextWriter writer;

            switch (vcfFormat) {
            case VCF:
                writer = VariantContextWriterFactory.create(new FilterOutputStream(outs) {
                    @Override
                    public void close() throws IOException {
                        this.out.flush();
                    }
                }, null, VariantContextWriterFactory.NO_OPTIONS);
                break;

            case BCF:
                writer = VariantContextWriterFactory
                        .create(new FilterOutputStream(new BlockCompressedOutputStream(outs, null)) {
                            @Override
                            public void close() throws IOException {
                                this.out.flush();
                            }
                        }, null, EnumSet.of(Options.FORCE_BCF));
                break;

            default:
                assert false;
                writer = null;
                break;
            }

            writer.writeHeader(header);
            writer.close();

            // Then, the actual VCF or BCF contents.
            Utils.mergeInto(outs, wrkDir, "", "", conf, "vcf-sort");

            // And if BCF, the BGZF terminator.
            if (vcfFormat == VCFFormat.BCF)
                outs.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);

            outs.close();

            System.out.printf("vcf-sort :: Merging complete in %d.%03d s.\n", t.stopS(), t.fms());

        } catch (IOException e) {
            System.err.printf("vcf-sort :: Output merging failed: %s\n", e);
            return 5;
        }
    return 0;
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.MapReduceUtils.java

License:LGPL

/**
 * Wait the completion of a job./*from  www  . j a  v  a 2  s  .com*/
 * @param job the job to submit
 * @param jobDescription the description of the job
 * @param waitTimeInMillis waiting time between 2 checks of the completion of
 *          jobs
 * @param status step status
 * @param counterGroup group of the counter to log
 * @throws EoulsanException if the job fail or if an exception occurs while
 *           submitting or waiting the end of the job
 */
public static void submitAndWaitForJob(final Job job, final String jobDescription, final int waitTimeInMillis,
        final TaskStatus status, final String counterGroup) throws EoulsanException {

    if (job == null) {
        throw new NullPointerException("The job is null");
    }

    if (jobDescription == null) {
        throw new NullPointerException("The jobDescription is null");
    }

    try {

        // Set the description of the context
        status.setDescription(job.getJobName());

        // Submit the job
        job.submit();

        // Add the Hadoop job to the list of job to kill if workflow fails
        HadoopJobEmergencyStopTask.addHadoopJobEmergencyStopTask(job);

        // Job the completion of the job (non verbose mode)
        job.waitForCompletion(false);

        // Remove the Hadoop job to the list of job to kill if workflow fails
        HadoopJobEmergencyStopTask.removeHadoopJobEmergencyStopTask(job);

        // Check if the job has been successfully executed
        if (!job.isSuccessful()) {

            status.setProgressMessage("FAILED");

            throw new EoulsanException("Fail of the Hadoop job: " + job.getJobFile());
        }

        // Set the counters
        status.setCounters(new HadoopReporter(job.getCounters()), counterGroup);

    } catch (ClassNotFoundException | InterruptedException | IOException e) {
        throw new EoulsanException(e);
    }
}

From source file:gobblin.compaction.mapreduce.MRCompactorJobRunner.java

License:Apache License

private void submitAndWait(Job job) throws ClassNotFoundException, IOException, InterruptedException {
    job.submit();
    MRCompactor.addRunningHadoopJob(this.dataset, job);
    LOG.info(String.format("MR job submitted for dataset %s, input %s, url: %s", this.dataset, getInputPaths(),
            job.getTrackingURL()));/*from w w  w.  ja  v a 2  s. co m*/
    while (!job.isComplete()) {
        if (this.policy == Policy.ABORT_ASAP) {
            LOG.info(String.format("MR job for dataset %s, input %s killed due to input data incompleteness."
                    + " Will try again later", this.dataset, getInputPaths()));
            job.killJob();
            return;
        }
        Thread.sleep(MR_JOB_CHECK_COMPLETE_INTERVAL_MS);
    }
    if (!job.isSuccessful()) {
        throw new RuntimeException(String.format("MR job failed for topic %s, input %s, url: %s", this.dataset,
                getInputPaths(), job.getTrackingURL()));
    }
}