List of usage examples for org.apache.hadoop.mapreduce Job submit
public void submit() throws IOException, InterruptedException, ClassNotFoundException
From source file:edu.umn.cs.spatialHadoop.visualization.SingleLevelPlot.java
License:Open Source License
/** * Generates a single level using a MapReduce job and returns the created job. * @param inFiles//from www.jav a 2s.co m * @param outFile * @param plotterClass * @param params * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static Job plotMapReduce(Path[] inFiles, Path outFile, Class<? extends Plotter> plotterClass, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Plotter plotter; try { plotter = plotterClass.newInstance(); } catch (InstantiationException e) { throw new RuntimeException("Error creating rastierizer", e); } catch (IllegalAccessException e) { throw new RuntimeException("Error creating rastierizer", e); } Job job = new Job(params, "SingleLevelPlot"); job.setJarByClass(SingleLevelPlot.class); job.setJobName("SingleLevelPlot"); // Set plotter Configuration conf = job.getConfiguration(); Plotter.setPlotter(conf, plotterClass); // Set input file MBR Rectangle inputMBR = (Rectangle) params.getShape("mbr"); Rectangle drawRect = (Rectangle) params.getShape("rect"); if (inputMBR == null) inputMBR = drawRect != null ? drawRect : FileMBR.fileMBR(inFiles, params); OperationsParams.setShape(conf, InputMBR, inputMBR); if (drawRect != null) OperationsParams.setShape(conf, SpatialInputFormat3.InputQueryRange, drawRect); // Adjust width and height if aspect ratio is to be kept int imageWidth = conf.getInt("width", 1000); int imageHeight = conf.getInt("height", 1000); if (params.getBoolean("keepratio", true)) { // Adjust width and height to maintain aspect ratio if (inputMBR.getWidth() / inputMBR.getHeight() > (double) imageWidth / imageHeight) { // Fix width and change height imageHeight = (int) (inputMBR.getHeight() * imageWidth / inputMBR.getWidth()); // Make divisible by two for compatibility with ffmpeg if (imageHeight % 2 == 1) imageHeight--; conf.setInt("height", imageHeight); } else { imageWidth = (int) (inputMBR.getWidth() * imageHeight / inputMBR.getHeight()); conf.setInt("width", imageWidth); } } boolean merge = conf.getBoolean("merge", true); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inFiles); if (conf.getBoolean("output", true)) { if (merge) { job.setOutputFormatClass(CanvasOutputFormat.class); conf.setClass("mapred.output.committer.class", CanvasOutputFormat.ImageWriterOld.class, org.apache.hadoop.mapred.OutputCommitter.class); } else { job.setOutputFormatClass(ImageOutputFormat.class); } CanvasOutputFormat.setOutputPath(job, outFile); } else { job.setOutputFormatClass(NullOutputFormat.class); } // Set mapper and reducer based on the partitioning scheme String partition = conf.get("partition", "none"); ClusterStatus clusterStatus = new JobClient(new JobConf()).getClusterStatus(); if (partition.equalsIgnoreCase("none")) { LOG.info("Using no-partition plot"); job.setMapperClass(NoPartitionPlotMap.class); job.setCombinerClass(NoPartitionPlotCombine.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(plotter.getCanvasClass()); if (merge) { int numSplits = new SpatialInputFormat3().getSplits(job).size(); job.setReducerClass(NoPartitionPlotReduce.class); // Set number of reduce tasks according to cluster status int maxReduce = Math.max(1, clusterStatus.getMaxReduceTasks() * 7 / 8); job.setNumReduceTasks(Math.max(1, Math.min(maxReduce, numSplits / maxReduce))); } else { job.setNumReduceTasks(0); } } else { LOG.info("Using repartition plot"); Partitioner partitioner; if (partition.equals("pixel")) { // Special case for pixel level partitioning as it depends on the // visualization parameters partitioner = new GridPartitioner(inputMBR, imageWidth, imageHeight); } else if (partition.equals("grid")) { int numBlocks = 0; for (Path in : inFiles) { FileSystem fs = in.getFileSystem(params); long size = FileUtil.getPathSize(fs, in); long blockSize = fs.getDefaultBlockSize(in); numBlocks += Math.ceil(size / (double) blockSize); } int numPartitions = numBlocks * 1000; int gridSize = (int) Math.ceil(Math.sqrt(numPartitions)); partitioner = new GridPartitioner(inputMBR, gridSize, gridSize); } else { // Use a standard partitioner as created by the indexer partitioner = Indexer.createPartitioner(inFiles, outFile, conf, partition); } Shape shape = params.getShape("shape"); job.setMapperClass(RepartitionPlotMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(RepartitionPlotReduce.class); // Set number of reducers according to cluster size job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks() * 9 / 10)); Partitioner.setPartitioner(conf, partitioner); } // Use multithreading in case the job is running locally conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (params.getBoolean("background", false)) { // Run in background job.submit(); } else { job.waitForCompletion(params.getBoolean("verbose", false)); } return job; }
From source file:edu.umn.cs.sthadoop.trajectory.TrajectoryOverlap.java
License:Open Source License
public static Job rangeQueryMapReduce(Path inFile, Path outFile, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { // Use the built-in range filter of the input format params.set(SpatialInputFormat3.InputQueryRange, params.get("rect")); // Use multithreading in case it is running locally params.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); Job job = new Job(params, "Traj-KNN-distance"); job.setJarByClass(RangeQuery.class); job.setNumReduceTasks(0);//from w ww. j av a 2 s. c o m job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inFile); job.setMapperClass(RangeQueryMap.class); if (params.getBoolean("output", true) && outFile != null) { job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outFile); } else { // Skip writing the output for the sake of debugging job.setOutputFormatClass(NullOutputFormat.class); } // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(false); } else { job.submit(); } return job; }
From source file:ExceptionLicenses.Driver.java
public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("usage: [input] [output]"); System.exit(-1);// w ww . j av a2s .com } Job job = Job.getInstance(new Configuration()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //Key Value job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(LiecensesMapper.class); job.setReducerClass(LiecensesReducer.class); job.setInputFormatClass(LiecenseInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJarByClass(Driver.class); job.submit(); }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.Summarize.java
License:Open Source License
private boolean runSummary(Path bamPath) throws IOException, ClassNotFoundException, InterruptedException { final Configuration conf = getConf(); Utils.configureSampling(wrkDir, bamPath.getName(), conf); final Job job = new Job(conf); job.setJarByClass(Summarize.class); job.setMapperClass(Mapper.class); job.setReducerClass(SummarizeReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Range.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(RangeCount.class); job.setInputFormatClass(SummarizeInputFormat.class); job.setOutputFormatClass(SummarizeOutputFormat.class); FileInputFormat.setInputPaths(job, bamPath); FileOutputFormat.setOutputPath(job, wrkDir); job.setPartitionerClass(TotalOrderPartitioner.class); System.out.println("summarize :: Sampling..."); t.start();/*w w w . j a va 2 s . c om*/ InputSampler.<LongWritable, Range>writePartitionFile(job, new InputSampler.RandomSampler<LongWritable, Range>(0.01, 10000, Math.max(100, reduceTasks))); System.out.printf("summarize :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms()); for (String lvl : levels) { MultipleOutputs.addNamedOutput(job, getSummaryName(lvl, false), SummarizeOutputFormat.class, NullWritable.class, Range.class); MultipleOutputs.addNamedOutput(job, getSummaryName(lvl, true), SummarizeOutputFormat.class, NullWritable.class, Range.class); } job.submit(); System.out.println("summarize :: Waiting for job completion..."); t.start(); if (!job.waitForCompletion(verbose)) { System.err.println("summarize :: Job failed."); return false; } System.out.printf("summarize :: Job complete in %d.%03d s.\n", t.stopS(), t.fms()); return true; }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java
License:Open Source License
static Job sortOne(Configuration conf, Path inputFile, Path outputDir, String commandName, String samplingInfo) throws IOException, ClassNotFoundException, InterruptedException { conf.set(Utils.WORK_FILENAME_PROPERTY, inputFile.getName()); Utils.configureSampling(outputDir, inputFile.getName(), conf); final Job job = new Job(conf); job.setJarByClass(Summarize.class); job.setMapperClass(Mapper.class); job.setReducerClass(SortReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SortInputFormat.class); job.setOutputFormatClass(SortOutputFormat.class); FileInputFormat.setInputPaths(job, inputFile); FileOutputFormat.setOutputPath(job, outputDir); job.setPartitionerClass(TotalOrderPartitioner.class); final Timer t = new Timer(); System.out.printf("%s :: Sampling%s...\n", commandName, samplingInfo); t.start();/*from ww w .j a v a 2s.co m*/ InputSampler.<LongWritable, Text>writePartitionFile(job, new InputSampler.SplitSampler<LongWritable, Text>( Math.max(1 << 16, conf.getInt("mapred.reduce.tasks", 1)), 10)); System.out.printf("%s :: Sampling complete in %d.%03d s.\n", commandName, t.stopS(), t.fms()); job.submit(); return job; }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.FixMate.java
License:Open Source License
@Override protected int run(CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("fixmate :: WORKDIR not given."); return 3; }/*from w ww. j a va 2 s .c o m*/ if (args.size() == 1) { System.err.println("fixmate :: INPATH not given."); return 3; } if (!cacheAndSetProperties(parser)) return 3; final SAMFileReader.ValidationStringency stringency = Utils.toStringency(parser.getOptionValue( stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "fixmate"); if (stringency == null) return 3; Path wrkDir = new Path(args.get(0)); final List<String> strInputs = args.subList(1, args.size()); final List<Path> inputs = new ArrayList<Path>(strInputs.size()); for (final String in : strInputs) inputs.add(new Path(in)); final Configuration conf = getConf(); // Used by Utils.getMergeableWorkFile() to name the output files. final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName(); conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName); if (stringency != null) conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString()); final boolean globalSort = parser.getBoolean(sortOpt); if (globalSort) Utils.setHeaderMergerSortOrder(conf, SAMFileHeader.SortOrder.queryname); conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0])); final Timer t = new Timer(); try { // Required for path ".", for example. wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir); if (globalSort) Utils.configureSampling(wrkDir, intermediateOutName, conf); final Job job = new Job(conf); job.setJarByClass(FixMate.class); job.setMapperClass(FixMateMapper.class); job.setReducerClass(FixMateReducer.class); if (!parser.getBoolean(noCombinerOpt)) job.setCombinerClass(FixMateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(AnySAMInputFormat.class); job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class); for (final Path in : inputs) FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, wrkDir); if (globalSort) { job.setPartitionerClass(TotalOrderPartitioner.class); System.out.println("fixmate :: Sampling..."); t.start(); InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job, new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000, Math.max(100, reduceTasks))); System.out.printf("fixmate :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms()); } job.submit(); System.out.println("fixmate :: Waiting for job completion..."); t.start(); if (!job.waitForCompletion(verbose)) { System.err.println("fixmate :: Job failed."); return 4; } System.out.printf("fixmate :: Job complete in %d.%03d s.\n", t.stopS(), t.fms()); } catch (IOException e) { System.err.printf("fixmate :: Hadoop error: %s\n", e); return 4; } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } if (outPath != null) try { Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "fixmate"); } catch (IOException e) { System.err.printf("fixmate :: Output merging failed: %s\n", e); return 5; } return 0; }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.Sort.java
License:Open Source License
@Override protected int run(CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("sort :: WORKDIR not given."); return 3; }/*from www. jav a 2 s .co m*/ if (args.size() == 1) { System.err.println("sort :: INPATH not given."); return 3; } if (!cacheAndSetProperties(parser)) return 3; final SAMFileReader.ValidationStringency stringency = Utils.toStringency(parser.getOptionValue( stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "sort"); if (stringency == null) return 3; Path wrkDir = new Path(args.get(0)); final List<String> strInputs = args.subList(1, args.size()); final List<Path> inputs = new ArrayList<Path>(strInputs.size()); for (final String in : strInputs) inputs.add(new Path(in)); final Configuration conf = getConf(); Utils.setHeaderMergerSortOrder(conf, SortOrder.coordinate); conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0])); if (stringency != null) conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString()); // Used by Utils.getMergeableWorkFile() to name the output files. final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName(); conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName); final Timer t = new Timer(); try { // Required for path ".", for example. wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir); Utils.configureSampling(wrkDir, intermediateOutName, conf); final Job job = new Job(conf); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(SortReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(SortInputFormat.class); job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class); for (final Path in : inputs) FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, wrkDir); job.setPartitionerClass(TotalOrderPartitioner.class); System.out.println("sort :: Sampling..."); t.start(); InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job, new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000, Math.max(100, reduceTasks))); System.out.printf("sort :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms()); job.submit(); System.out.println("sort :: Waiting for job completion..."); t.start(); if (!job.waitForCompletion(verbose)) { System.err.println("sort :: Job failed."); return 4; } System.out.printf("sort :: Job complete in %d.%03d s.\n", t.stopS(), t.fms()); } catch (IOException e) { System.err.printf("sort :: Hadoop error: %s\n", e); return 4; } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } if (outPath != null) try { Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "sort"); } catch (IOException e) { System.err.printf("sort :: Output merging failed: %s\n", e); return 5; } return 0; }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.VCFSort.java
License:Open Source License
@Override protected int run(CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("vcf-sort :: WORKDIR not given."); return 3; }//from www.jav a 2 s .c o m if (args.size() == 1) { System.err.println("vcf-sort :: INPATH not given."); return 3; } if (!cacheAndSetProperties(parser)) return 3; Path wrkDir = new Path(args.get(0)); final Path inPath = new Path(args.get(1)); final Configuration conf = getConf(); VCFFormat vcfFormat = null; final String f = (String) parser.getOptionValue(formatOpt); if (f != null) { try { vcfFormat = VCFFormat.valueOf(f.toUpperCase(Locale.ENGLISH)); } catch (IllegalArgumentException e) { System.err.printf("%s :: invalid format '%s'\n", getCommandName(), f); return 3; } } if (vcfFormat == null) vcfFormat = outPath == null ? VCFFormat.BCF : VCFFormat.inferFromFilePath(outPath); conf.setBoolean(VCFInputFormat.TRUST_EXTS_PROPERTY, !parser.getBoolean(noTrustExtsOpt)); conf.setBoolean(KeyIgnoringVCFOutputFormat.WRITE_HEADER_PROPERTY, outPath == null); conf.set(VCFOutputFormat.OUTPUT_VCF_FORMAT_PROPERTY, vcfFormat.toString()); // Used by Utils.getMergeableWorkFile() to name the output files. final String intermediateOutName = (outPath == null ? inPath : outPath).getName(); conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName); conf.set(SortOutputFormat.INPUT_PATH_PROP, inPath.toString()); final Timer t = new Timer(); try { // Required for path ".", for example. wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir); Utils.configureSampling(wrkDir, intermediateOutName, conf); final Job job = new Job(conf); job.setJarByClass(VCFSort.class); job.setMapperClass(Mapper.class); job.setReducerClass(VCFSortReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(VariantContextWritable.class); job.setInputFormatClass(VCFInputFormat.class); job.setOutputFormatClass(SortOutputFormat.class); FileInputFormat.addInputPath(job, inPath); FileOutputFormat.setOutputPath(job, wrkDir); job.setPartitionerClass(TotalOrderPartitioner.class); System.out.println("vcf-sort :: Sampling..."); t.start(); InputSampler.<LongWritable, VariantContextWritable>writePartitionFile(job, new InputSampler.RandomSampler<LongWritable, VariantContextWritable>(0.01, 10000, Math.max(100, reduceTasks))); System.out.printf("vcf-sort :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms()); job.submit(); System.out.println("vcf-sort :: Waiting for job completion..."); t.start(); if (!job.waitForCompletion(verbose)) { System.err.println("vcf-sort :: Job failed."); return 4; } System.out.printf("vcf-sort :: Job complete in %d.%03d s.\n", t.stopS(), t.fms()); } catch (IOException e) { System.err.printf("vcf-sort :: Hadoop error: %s\n", e); return 4; } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } if (outPath != null) try { System.out.println("vcf-sort :: Merging output..."); t.start(); final OutputStream outs = outPath.getFileSystem(conf).create(outPath); // First, place the VCF or BCF header. final WrapSeekable ins = WrapSeekable.openPath(conf, inPath); final VCFHeader header = VCFHeaderReader.readHeaderFrom(ins); ins.close(); final VariantContextWriter writer; switch (vcfFormat) { case VCF: writer = VariantContextWriterFactory.create(new FilterOutputStream(outs) { @Override public void close() throws IOException { this.out.flush(); } }, null, VariantContextWriterFactory.NO_OPTIONS); break; case BCF: writer = VariantContextWriterFactory .create(new FilterOutputStream(new BlockCompressedOutputStream(outs, null)) { @Override public void close() throws IOException { this.out.flush(); } }, null, EnumSet.of(Options.FORCE_BCF)); break; default: assert false; writer = null; break; } writer.writeHeader(header); writer.close(); // Then, the actual VCF or BCF contents. Utils.mergeInto(outs, wrkDir, "", "", conf, "vcf-sort"); // And if BCF, the BGZF terminator. if (vcfFormat == VCFFormat.BCF) outs.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK); outs.close(); System.out.printf("vcf-sort :: Merging complete in %d.%03d s.\n", t.stopS(), t.fms()); } catch (IOException e) { System.err.printf("vcf-sort :: Output merging failed: %s\n", e); return 5; } return 0; }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.MapReduceUtils.java
License:LGPL
/** * Wait the completion of a job./*from www . j a v a 2 s .com*/ * @param job the job to submit * @param jobDescription the description of the job * @param waitTimeInMillis waiting time between 2 checks of the completion of * jobs * @param status step status * @param counterGroup group of the counter to log * @throws EoulsanException if the job fail or if an exception occurs while * submitting or waiting the end of the job */ public static void submitAndWaitForJob(final Job job, final String jobDescription, final int waitTimeInMillis, final TaskStatus status, final String counterGroup) throws EoulsanException { if (job == null) { throw new NullPointerException("The job is null"); } if (jobDescription == null) { throw new NullPointerException("The jobDescription is null"); } try { // Set the description of the context status.setDescription(job.getJobName()); // Submit the job job.submit(); // Add the Hadoop job to the list of job to kill if workflow fails HadoopJobEmergencyStopTask.addHadoopJobEmergencyStopTask(job); // Job the completion of the job (non verbose mode) job.waitForCompletion(false); // Remove the Hadoop job to the list of job to kill if workflow fails HadoopJobEmergencyStopTask.removeHadoopJobEmergencyStopTask(job); // Check if the job has been successfully executed if (!job.isSuccessful()) { status.setProgressMessage("FAILED"); throw new EoulsanException("Fail of the Hadoop job: " + job.getJobFile()); } // Set the counters status.setCounters(new HadoopReporter(job.getCounters()), counterGroup); } catch (ClassNotFoundException | InterruptedException | IOException e) { throw new EoulsanException(e); } }
From source file:gobblin.compaction.mapreduce.MRCompactorJobRunner.java
License:Apache License
private void submitAndWait(Job job) throws ClassNotFoundException, IOException, InterruptedException { job.submit(); MRCompactor.addRunningHadoopJob(this.dataset, job); LOG.info(String.format("MR job submitted for dataset %s, input %s, url: %s", this.dataset, getInputPaths(), job.getTrackingURL()));/*from w w w. ja v a 2 s. co m*/ while (!job.isComplete()) { if (this.policy == Policy.ABORT_ASAP) { LOG.info(String.format("MR job for dataset %s, input %s killed due to input data incompleteness." + " Will try again later", this.dataset, getInputPaths())); job.killJob(); return; } Thread.sleep(MR_JOB_CHECK_COMPLETE_INTERVAL_MS); } if (!job.isSuccessful()) { throw new RuntimeException(String.format("MR job failed for topic %s, input %s, url: %s", this.dataset, getInputPaths(), job.getTrackingURL())); } }