List of usage examples for org.apache.hadoop.mapreduce Job submit
public void submit() throws IOException, InterruptedException, ClassNotFoundException
From source file:com.rim.logdriver.util.Search.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }// ww w.j ava2 s .c o m FileSystem fs = FileSystem.get(conf); // The command line options String searchString = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchString input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchString = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Search.class); jobConf.setIfUnset("mapred.job.name", "Search Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string", searchString); job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.sequenceiq.yarntest.mr.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from ww w . ja v a 2s . com */ public static JobID submitPiEstimationMRApp(String jobName, int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(conf); //setup job conf job.setJobName(jobName); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { fs.delete(tmpDir, true); // throw new IOException("Tmp directory " + fs.makeQualified(tmpDir) // + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } // try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); job.submit(); // final double duration = (System.currentTimeMillis() - startTime)/1000.0; // System.out.println("Job Finished in " + duration + " seconds"); return job.getJobID(); // } finally { // fs.delete(tmpDir, true); // } }
From source file:com.streamsets.pipeline.stage.destination.mapreduce.MapReduceExecutor.java
License:Apache License
private Job createAndSubmitJob(final Configuration configuration) throws IOException, InterruptedException { return mapReduceConfig.getUGI().doAs((PrivilegedExceptionAction<Job>) () -> { // Create new mapreduce job object Callable<Job> jobCreator = ReflectionUtils.newInstance(jobConfig.getJobCreator(), configuration); Job job = jobCreator.call(); // In trace mode, dump all the configuration that we're using for the job if (LOG.isTraceEnabled()) { LOG.trace("Using the following configuration object for mapreduce job."); for (Map.Entry<String, String> entry : configuration) { LOG.trace(" Config: {}={}", entry.getKey(), entry.getValue()); }// w w w .j a v a2s . co m } // Submit it for processing. Blocking mode is only for testing. job.submit(); if (waitForCompletition) { job.waitForCompletion(true); } return job; }); }
From source file:com.twitter.algebra.matrix.format.Sequence2MatrixFormatJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "seq2mtx"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(Sequence2MatrixFormatJob.class); job.setJobName(Sequence2MatrixFormatJob.class.getSimpleName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setNumReduceTasks(0);/*from w w w. j av a 2 s. c o m*/ job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.matrix.multiply.ABInnerHDFSBroadcastOfB.java
License:Apache License
/** * Perform A x B, where A and B refer to the paths that contain matrices in * {@link SequenceFileInputFormat} Refer to {@link ABInnerHDFSBroadcastOfB} * for further details.//www. j av a2s . c o m * * @param conf the initial configuration * @param matrixInputPath path to matrix A * @param inMemMatrixDir path to matrix B (must be small enough to fit into * memory) * @param matrixOutputPath path to which AxB will be written * @param inMemMatrixNumRows B rows * @param inMemMatrixNumCols B cols * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run(Configuration conf, Path matrixInputPath, String inMemMatrixDir, Path matrixOutputPath, int inMemMatrixNumRows, int inMemMatrixNumCols) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "axbinner"); conf.set(MATRIXINMEMORY, inMemMatrixDir); conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows); conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(ABInnerHDFSBroadcastOfB.class); job.setJobName(ABInnerHDFSBroadcastOfB.class.getSimpleName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); // since we do not use reducer, to get total order, the map output files has // to be renamed after this function returns: {@link // AlgebraCommon#fixPartitioningProblem} job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.matrix.multiply.ABOuterHDFSBroadcastOfA.java
License:Apache License
/** * Perform A x B, where A and B refer to the paths that contain matrices in * {@link SequenceFileInputFormat} Refer to {@link ABOuterHDFSBroadcastOfA} * for further details./*from w w w.j a v a 2s . c om*/ * * @param conf * the initial configuration * @param matrixInputPath * path to matrix A * @param inMemMatrixDir * path to matrix B (must be small enough to fit into memory) * @param matrixOutputPath * path to which AxB will be written * @param inMemMatrixNumRows * B rows * @param inMemMatrixNumCols * B cols * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run(Configuration conf, String inMemMatrixDir, Path matrixInputPath, Path matrixOutputPath, int inMemMatrixNumRows, int inMemMatrixNumCols) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MATRIXINMEMORY, inMemMatrixDir); conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows); conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(ABOuterHDFSBroadcastOfA.class); job.setJobName(ABOuterHDFSBroadcastOfA.class.getSimpleName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); // ensures total order (when used with {@link MatrixOutputFormat}), RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, inMemMatrixNumRows); job.setCombinerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class); job.setReducerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.matrix.multiply.AtBOuterStaticMapsideJoinJob.java
License:Apache License
public void run(Configuration conf, Path atPath, Path bPath, Path outPath, int outCardinality) throws IOException, InterruptedException, ClassNotFoundException { conf.setInt(OUT_CARD, outCardinality); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJobName(AtBOuterStaticMapsideJoinJob.class.getSimpleName()); job.setJarByClass(AtBOuterStaticMapsideJoinJob.class); FileSystem fs = FileSystem.get(atPath.toUri(), conf); atPath = fs.makeQualified(atPath);/* www .java2s . c o m*/ bPath = fs.makeQualified(bPath); job.setInputFormatClass(CompositeInputFormat.class); //mapside join expression job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose("inner", SequenceFileInputFormat.class, atPath, bPath)); job.setOutputFormatClass(MatrixOutputFormat.class); outPath = fs.makeQualified(outPath); FileOutputFormat.setOutputPath(job, outPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setCombinerClass(MyReducer.class); int numReducers = conf.getInt("algebra.reduceslots.multiply", 10); job.setNumReduceTasks(numReducers); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed"); }
From source file:com.twitter.algebra.matrix.multiply.AtB_DMJ.java
License:Apache License
/** * Perform A x B, where At and B refer to the paths that contain matrices in * {@link SequenceFileInputFormat}. One of At and B must also conform with * {@link MapDir} format. Refer to {@link AtB_DMJ} for further details. * /*from w w w.j a va2s . c o m*/ * @param conf the initial configuration * @param mapDirPath path to the matrix in {@link MapDir} format * @param matrixInputPaths the list of paths to matrix input partitions over * which we iterate * @param matrixOutputPath path to which AxB will be written * @param atCols number of columns of At (rows of A) * @param bCols * @param colsPerPartition cols per partition of the input matrix (whether At or B) * @param aIsMapDir is A chosen to be loaded as MapDir * @param useInMemCombiner * @param numberOfJobs the hint for the desired number of parallel jobs * @return the running job * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public Job run(Configuration conf, Path mapDirPath, Path matrixInputPaths, Path matrixOutputPath, int atCols, int bCols, int colsPerPartition, boolean aIsMapDir, boolean useInMemCombiner) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.set(MATRIXINMEMORY, mapDirPath.toString()); conf.setBoolean(AISMAPDIR, aIsMapDir); conf.setBoolean(USEINMEMCOMBINER, useInMemCombiner); conf.setInt(RESULTROWS, atCols); conf.setInt(RESULTCOLS, bCols); conf.setInt(PARTITIONCOLS, colsPerPartition); FileSystem fs = FileSystem.get(matrixOutputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPaths, "dmj"); if (useInMemCombiner) { Configuration newConf = new Configuration(conf); newConf.set("mapreduce.task.io.sort.mb", "1"); conf = newConf; } @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(AtB_DMJ.class); job.setJobName(AtB_DMJ.class.getSimpleName()); matrixOutputPath = fs.makeQualified(matrixOutputPath); matrixInputPaths = fs.makeQualified(matrixInputPaths); MultipleInputs.addInputPath(job, matrixInputPaths, SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); if (!useInMemCombiner) job.setCombinerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class); int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "dmj"); job.setNumReduceTasks(numReducers); // ensures total order (when used with {@link MatrixOutputFormat}), RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, atCols); job.setReducerClass(EpsilonReducer.class); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); return job; }
From source file:com.twitter.algebra.matrix.multiply.PartitionerJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows, int partitions) throws IOException, InterruptedException, ClassNotFoundException { @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(PartitionerJob.class); job.setJobName(PartitionerJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setNumReduceTasks(partitions);// w w w. j av a 2 s.co m job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(IdMapper.class); job.setReducerClass(IdReducer.class); RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, aRows); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.matrix.text.Matrix2TextJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(Matrix2TextJob.class); job.setJobName(Matrix2TextJob.class.getSimpleName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); // FileInputFormat.addInputPath(job, matrixInputPath); MultipleInputs.addInputPath(job, matrixInputPath, SequenceFileInputFormat.class); // job.setInputFormatClass(SequenceFileInputFormat.class); TextOutputFormat.setOutputPath(job, matrixOutputPath); job.setNumReduceTasks(0);//www.j a v a 2 s . c om job.setOutputFormatClass(TextOutputFormat.class); // job.setOutputKeyClass(IntWritable.class); // job.setOutputValueClass(org.apache.hadoop.io.Text); job.setMapperClass(IdMapper.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }