List of usage examples for org.apache.hadoop.mapreduce Job submit
public void submit() throws IOException, InterruptedException, ClassNotFoundException
From source file:org.qcri.pca.ReconstructionErrJob.java
/** * Refer to {@link ReconstructionErrJob} for explanation of the job * //from w ww . java2 s .com * @param conf * the configuration * @param yPath * the path to input matrix Y * @param y2xPath * the path to in-memory matrix Y2X, where X = Y * Y2X * @param yCols * the number of columns in Y * @param xCols * the number of columns in X * @param cPath * the path to in-memory matrix C, where ReconY = Xc * C' * @param zmPath * the path to vector Zm, where Zm = Ym * Y2X * C' - Ym * @param ymPath * the path the the mean vector Ym * @param outPath * the output path * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run(Configuration conf, Path yPath, Path y2xPath, int yCols, int xCols, Path cPath, String zmPath, String ymPath, Path outPath, final float ERR_SAMPLE_RATE) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MATRIXY2X, y2xPath.toString()); conf.set(RECONSTRUCTIONMATRIX, cPath.toString()); conf.set(ZMPATH, zmPath); conf.set(YMPATH, ymPath); conf.setInt(YCOLS, yCols); conf.setInt(XCOLS, xCols); conf.set(ERRSAMPLERATE, "" + ERR_SAMPLE_RATE); FileSystem fs = FileSystem.get(yPath.toUri(), conf); yPath = fs.makeQualified(yPath); outPath = fs.makeQualified(outPath); Job job = new Job(conf); FileInputFormat.addInputPath(job, yPath); FileOutputFormat.setOutputPath(job, outPath); job.setJobName("ReconErrJob-" + yPath.getName()); job.setJarByClass(ReconstructionErrJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(1); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(1); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleWritable.class); job.submit(); job.waitForCompletion(true); }
From source file:org.qcri.pca.VarianceJob.java
public void run(Configuration conf, Path yPath, String ymPath, String matrixY2XDir, String xmPath, String matrixCDir, Path outPath) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MATRIXY2X, matrixY2XDir);/*from w ww .ja v a 2 s . c om*/ conf.set(MATRIXC, matrixCDir); conf.set(XMPATH, xmPath); conf.set(YMPATH, ymPath); FileSystem fs = FileSystem.get(yPath.toUri(), conf); yPath = fs.makeQualified(yPath); outPath = fs.makeQualified(outPath); Job job = new Job(conf); FileInputFormat.addInputPath(job, yPath); FileOutputFormat.setOutputPath(job, outPath); job.setJobName("VarianceJob-" + yPath.getName()); job.setJarByClass(VarianceJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(1); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(DoubleWritable.class); job.submit(); job.waitForCompletion(true); }
From source file:org.sample.hadoop.WordCountV2.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); job.setJobName("wordcount"); job.setJarByClass(this.getClass()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setCombinerClass(IntSumReducer.class); job.setMapperClass(TokenizerMapper.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; i++) { if ("-skip".equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), job.getConfiguration()); job.getConfiguration().setBoolean("wordcount.skip.patterns", true); } else {/* ww w. j a v a 2s. co m*/ other_args.add(args[i]); } } FileInputFormat.addInputPath(job, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(job, new Path(other_args.get(1))); job.submit(); return 0; }
From source file:org.seqdoop.hadoop_bam.cli.plugins.FixMate.java
License:Open Source License
@Override protected int run(CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("fixmate :: WORKDIR not given."); return 3; }// ww w . jav a 2s . c o m if (args.size() == 1) { System.err.println("fixmate :: INPATH not given."); return 3; } if (!cacheAndSetProperties(parser)) return 3; final ValidationStringency stringency = Utils.toStringency( parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "fixmate"); if (stringency == null) return 3; Path wrkDir = new Path(args.get(0)); final List<String> strInputs = args.subList(1, args.size()); final List<Path> inputs = new ArrayList<Path>(strInputs.size()); for (final String in : strInputs) inputs.add(new Path(in)); final Configuration conf = getConf(); // Used by Utils.getMergeableWorkFile() to name the output files. final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName(); conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName); if (stringency != null) conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString()); final boolean globalSort = parser.getBoolean(sortOpt); if (globalSort) Utils.setHeaderMergerSortOrder(conf, SAMFileHeader.SortOrder.queryname); conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0])); final Timer t = new Timer(); try { // Required for path ".", for example. wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir); if (globalSort) Utils.configureSampling(wrkDir, intermediateOutName, conf); final Job job = new Job(conf); job.setJarByClass(FixMate.class); job.setMapperClass(FixMateMapper.class); job.setReducerClass(FixMateReducer.class); if (!parser.getBoolean(noCombinerOpt)) job.setCombinerClass(FixMateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(AnySAMInputFormat.class); job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class); for (final Path in : inputs) FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, wrkDir); if (globalSort) { job.setPartitionerClass(TotalOrderPartitioner.class); System.out.println("fixmate :: Sampling..."); t.start(); InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job, new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000, Math.max(100, reduceTasks))); System.out.printf("fixmate :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms()); } job.submit(); System.out.println("fixmate :: Waiting for job completion..."); t.start(); if (!job.waitForCompletion(verbose)) { System.err.println("fixmate :: Job failed."); return 4; } System.out.printf("fixmate :: Job complete in %d.%03d s.\n", t.stopS(), t.fms()); } catch (IOException e) { System.err.printf("fixmate :: Hadoop error: %s\n", e); return 4; } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } if (outPath != null) try { Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "fixmate"); } catch (IOException e) { System.err.printf("fixmate :: Output merging failed: %s\n", e); return 5; } return 0; }
From source file:org.seqdoop.hadoop_bam.cli.plugins.Sort.java
License:Open Source License
@Override protected int run(CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("sort :: WORKDIR not given."); return 3; }/*from w w w .ja v a2s . c om*/ if (args.size() == 1) { System.err.println("sort :: INPATH not given."); return 3; } if (!cacheAndSetProperties(parser)) return 3; final ValidationStringency stringency = Utils.toStringency( parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "sort"); if (stringency == null) return 3; Path wrkDir = new Path(args.get(0)); final List<String> strInputs = args.subList(1, args.size()); final List<Path> inputs = new ArrayList<Path>(strInputs.size()); for (final String in : strInputs) inputs.add(new Path(in)); final Configuration conf = getConf(); Utils.setHeaderMergerSortOrder(conf, SortOrder.coordinate); conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0])); if (stringency != null) conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString()); // Used by Utils.getMergeableWorkFile() to name the output files. final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName(); conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName); final Timer t = new Timer(); try { // Required for path ".", for example. wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir); Utils.configureSampling(wrkDir, intermediateOutName, conf); final Job job = new Job(conf); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(SortReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(SortInputFormat.class); job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class); for (final Path in : inputs) FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, wrkDir); job.setPartitionerClass(TotalOrderPartitioner.class); System.out.println("sort :: Sampling..."); t.start(); InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job, new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000, Math.max(100, reduceTasks))); System.out.printf("sort :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms()); job.submit(); System.out.println("sort :: Waiting for job completion..."); t.start(); if (!job.waitForCompletion(verbose)) { System.err.println("sort :: Job failed."); return 4; } System.out.printf("sort :: Job complete in %d.%03d s.\n", t.stopS(), t.fms()); } catch (IOException e) { System.err.printf("sort :: Hadoop error: %s\n", e); return 4; } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } if (outPath != null) try { Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "sort"); } catch (IOException e) { System.err.printf("sort :: Output merging failed: %s\n", e); return 5; } return 0; }
From source file:org.seqdoop.hadoop_bam.examples.ReadFilter.java
License:Open Source License
public int run(String[] args) throws Exception { final Configuration conf = getConf(); //vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv //The path to the header for the output file, make sure this is correctly specified! conf.set(MyOutputFormat.HEADER_FROM_FILE, "/user/ubuntu/header.bam"); //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ final Job job = new Job(conf); job.setJarByClass(ReadFilter.class); job.setMapperClass(ReadFilterMapper.class); job.setReducerClass(ReadFilterReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(SAMRecordWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(AnySAMInputFormat.class); job.setOutputFormatClass(ReadFilter.MyOutputFormat.class); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job, new Path(args[0])); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1])); job.submit(); if (!job.waitForCompletion(true)) { System.err.println("sort :: Job failed."); return 1; }/*from w w w . j a v a2 s.c om*/ return 0; }
From source file:org.seqdoop.hadoop_bam.examples.TestBAM.java
License:Open Source License
public int run(String[] args) throws Exception { final Configuration conf = getConf(); conf.set(MyOutputFormat.HEADER_FROM_FILE, args[0]); final Job job = new Job(conf); job.setJarByClass(TestBAM.class); job.setMapperClass(TestBAMMapper.class); job.setReducerClass(TestBAMReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(SAMRecordWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(AnySAMInputFormat.class); job.setOutputFormatClass(TestBAM.MyOutputFormat.class); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0])); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1])); job.submit(); if (!job.waitForCompletion(true)) { System.err.println("sort :: Job failed."); return 1; }//from ww w . j av a2 s . c om return 0; }
From source file:org.seqdoop.hadoop_bam.examples.TestVCF.java
License:Open Source License
public int run(String[] args) throws Exception { final Configuration conf = getConf(); conf.set(VCFOutputFormat.OUTPUT_VCF_FORMAT_PROPERTY, "VCF"); conf.set(MyVCFOutputFormat.INPUT_PATH_PROP, args[0]); final Job job = new Job(conf); job.setJarByClass(TestVCF.class); job.setMapperClass(TestVCFMapper.class); job.setReducerClass(TestVCFReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(VariantContextWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VariantContextWritable.class); job.setInputFormatClass(VCFInputFormat.class); job.setOutputFormatClass(MyVCFOutputFormat.class); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0])); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1])); job.submit(); if (!job.waitForCompletion(true)) { System.err.println("sort :: Job failed."); return 1; }/* w w w. j a v a 2s. c o m*/ return 0; }
From source file:org.springframework.data.hadoop.mapreduce.JobExecutor.java
License:Apache License
protected Collection<Job> startJobs(final JobListener listener) { final Collection<Job> jbs = findJobs(); final List<Job> started = new ArrayList<Job>(); taskExecutor.execute(new Runnable() { @Override/*from ww w . j a v a 2s. com*/ public void run() { Object listenerInit = null; if (listener != null) { listenerInit = listener.beforeAction(); } try { for (final Job job : jbs) { boolean succes = false; try { // job is already running - ignore it if (JobUtils.getStatus(job).isStarted()) { log.info("Job [" + job.getJobName() + "] already started; skipping it..."); break; } log.info("Starting job [" + job.getJobName() + "]"); synchronized (started) { started.add(job); } if (!waitForCompletion) { succes = true; job.submit(); } else { succes = job.waitForCompletion(verbose); log.info("Completed job [" + job.getJobName() + "]"); if (listener != null) { listener.jobFinished(job); } } } catch (InterruptedException ex) { log.warn("Job [" + job.getJobName() + "] killed"); throw new IllegalStateException(ex); } catch (Exception ex) { log.warn("Cannot start job [" + job.getJobName() + "]", ex); throw new IllegalStateException(ex); } if (!succes) { if (!shuttingDown) { JobStatus status = JobUtils.getStatus(job); if (JobStatus.KILLED == status) { throw new IllegalStateException("Job " + job.getJobName() + "] killed"); } else { throw new IllegalStateException( "Job " + job.getJobName() + "] failed to start; status=" + status); } } else { log.info("Job [" + job.getJobName() + "] killed by shutdown"); } } } } finally { if (listener != null) { listener.afterAction(listenerInit); } } } }); return started; }
From source file:parquet.hadoop2.TestInputOutputFormat.java
License:Apache License
@Test public void testReadWrite() throws IOException, ClassNotFoundException, InterruptedException { final Configuration conf = new Configuration(); final Path inputPath = new Path("src/test/java/parquet/hadoop2/TestInputOutputFormat.java"); final Path parquetPath = new Path("target/test/hadoop2/example/TestInputOutputFormat/parquet"); final Path outputPath = new Path("target/test/hadoop2/example/TestInputOutputFormat/out"); final FileSystem fileSystem = parquetPath.getFileSystem(conf); fileSystem.delete(parquetPath, true); fileSystem.delete(outputPath, true); {//from w w w . j a v a 2 s .c o m final Job job = new Job(conf, "write"); TextInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(0); ExampleOutputFormat.setCompression(job, CompressionCodecName.GZIP); ExampleOutputFormat.setOutputPath(job, parquetPath); job.setOutputFormatClass(ExampleOutputFormat.class); job.setMapperClass(TestInputOutputFormat.MyMapper.class); ExampleOutputFormat.setSchema(job, MessageTypeParser.parseMessageType( "message example {\n" + "required int32 line;\n" + "required binary content;\n" + "}")); job.submit(); waitForJob(job); } { final Job job = new Job(conf, "read"); job.setInputFormatClass(ExampleInputFormat.class); ExampleInputFormat.setInputPaths(job, parquetPath); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(TestInputOutputFormat.MyMapper2.class); job.setNumReduceTasks(0); job.submit(); waitForJob(job); } final BufferedReader in = new BufferedReader(new FileReader(new File(inputPath.toString()))); final BufferedReader out = new BufferedReader( new FileReader(new File(outputPath.toString(), "part-m-00000"))); String lineIn; String lineOut = null; int lineNumber = 0; while ((lineIn = in.readLine()) != null && (lineOut = out.readLine()) != null) { ++lineNumber; lineOut = lineOut.substring(lineOut.indexOf("\t") + 1); assertEquals("line " + lineNumber, lineIn, lineOut); } assertNull("line " + lineNumber, lineIn); assertNull("line " + lineNumber, out.readLine()); assertTrue(lineNumber > 0); in.close(); out.close(); }