List of usage examples for org.apache.hadoop.mapreduce Job isSuccessful
public boolean isSuccessful() throws IOException
From source file:com.mozilla.socorro.hadoop.CrashCountToHbase.java
License:LGPL
public int run(String[] args) throws Exception { if (args.length != 2) { return printUsage(); }//from w w w. j av a2 s. c o m int rc = -1; Job job = initJob(args); job.waitForCompletion(true); if (job.isSuccessful()) { rc = 0; } return rc; }
From source file:com.mozilla.socorro.hadoop.RawDumpSize.java
License:LGPL
public int run(String[] args) throws Exception { if (args.length != 1) { return printUsage(); }//from w w w .jav a2 s . c o m int rc = -1; Job job = initJob(args); job.waitForCompletion(true); if (job.isSuccessful()) { rc = 0; FileSystem hdfs = null; DescriptiveStatistics rawStats = new DescriptiveStatistics(); long rawTotal = 0L; DescriptiveStatistics processedStats = new DescriptiveStatistics(); long processedTotal = 0L; try { hdfs = FileSystem.get(job.getConfiguration()); Pattern tabPattern = Pattern.compile("\t"); for (FileStatus status : hdfs.listStatus(FileOutputFormat.getOutputPath(job))) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath()))); String line = null; while ((line = reader.readLine()) != null) { String[] splits = tabPattern.split(line); int byteSize = Integer.parseInt(splits[2]); if ("raw".equals(splits[1])) { rawStats.addValue(byteSize); rawTotal += byteSize; } else if ("processed".equals(splits[1])) { processedStats.addValue(byteSize); processedTotal += byteSize; } } } finally { if (reader != null) { reader.close(); } } } } } finally { if (hdfs != null) { hdfs.close(); } } System.out.println("===== " + job.getConfiguration().get(START_DATE) + " raw_data:dump ====="); System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", rawStats.getMin(), rawStats.getMax(), rawStats.getMean())); System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f", rawStats.getPercentile(25.0d), rawStats.getPercentile(50.0d), rawStats.getPercentile(75.0d))); System.out.println("Total Bytes: " + rawTotal); System.out.println("===== " + job.getConfiguration().get(START_DATE) + " processed_data:json ====="); System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", processedStats.getMin(), processedStats.getMax(), processedStats.getMean())); System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f", processedStats.getPercentile(25.0d), processedStats.getPercentile(50.0d), processedStats.getPercentile(75.0d))); System.out.println("Total Bytes: " + processedTotal); } return rc; }
From source file:com.mycompany.maprpractice.runnerClass.WordCount.java
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*w ww .j a va 2 s .co m*/ Job job = new org.apache.hadoop.mapreduce.Job(); job.setJarByClass(WordCount.class); job.setJobName("WordCounter"); String inputPath = "C:\\Users\\priyamdixit\\Desktop\\TestData\\wordCount.txt"; String outputPath = "C:\\Users\\priyamdixit\\Desktop\\TestData"; FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // FileInputFormat.addInputPath(job, new Path(args[0])); // FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); int returnValue = job.waitForCompletion(true) ? 0 : 1; System.out.println("job.isSuccessful " + job.isSuccessful()); return returnValue; }
From source file:com.scaleoutsoftware.soss.hserver.Test_WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { writeFile();/*w w w . ja v a 2s. c o m*/ DataAccessor.clearAllObjects(); Configuration conf = new Configuration(); conf.setInt("mapred.hserver.setting.reducer.usememorymappedfiles", 0); String in = args.length == 2 ? args[0] : "random.txt"; String out = args.length == 2 ? args[1] : "c:\\development\\mapred_output\\dir" + System.currentTimeMillis(); HServerJob job; job = new HServerJob(conf, "overrides", true); Job job1 = job; // check overrides System.out.println("Check to ensure casting is correct..." + job.isSuccessful() + job1.isSuccessful()); // With phase1, run several times to test recording and replaying long time = System.currentTimeMillis(); // check runtime for (int i = 0; i < 3; i++) { job = new HServerJob(conf, "Job #" + i, true); // Need to manually edit this per deployment job.setJarPath("/path/to/your/classes.jar"); job.setJarByClass(Test_WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(8); FileInputFormat.addInputPath(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out + System.currentTimeMillis())); job.waitForCompletion(true); } System.out.println("Job done in " + (System.currentTimeMillis() - time) / 10); //Without combiner job = new HServerJob(conf); job.setJarPath("/path/to/your/classes.jar"); time = System.currentTimeMillis(); job.setJarByClass(Test_WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(8); FileInputFormat.addInputPath(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out + System.currentTimeMillis())); job.waitForCompletion(true); System.out.println("Job done in " + (System.currentTimeMillis() - time)); }
From source file:com.splout.db.hadoop.TablespaceGenerator.java
License:Apache License
protected void executeViewGeneration(TupleMRBuilder builder) throws IOException, InterruptedException, ClassNotFoundException, TablespaceGeneratorException, TupleMRException { try {/*from www.jav a 2 s .com*/ Job generationJob = builder.createJob(); long start = System.currentTimeMillis(); generationJob.waitForCompletion(true); if (!generationJob.isSuccessful()) { throw new TablespaceGeneratorException("Error executing generation Job"); } long end = System.currentTimeMillis(); Log.info("Tablespace store generated in " + (end - start) + " ms."); } finally { builder.cleanUpInstanceFiles(); } }
From source file:com.twitter.elephanttwin.lucene.indexing.AbstractLuceneIndexingJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { LOG = Logger.getLogger(this.getClass()); params = newIndexConfig();//from w ww. j a va 2 s . c o m LOG.info("Starting up indexer..."); LOG.info(" - input: " + Joiner.on(" ").join(IndexConfig.input.get())); LOG.info(" - index: " + IndexConfig.index); LOG.info(" - number of shards: " + IndexConfig.numPartitions.get()); Configuration conf = getConf(); conf.set(AbstractLuceneIndexingReducer.HDFS_INDEX_LOCATION, IndexConfig.index.get()); conf.set(AbstractLuceneIndexingReducer.ANALYZER, IndexConfig.analyzer.get()); conf.set(AbstractLuceneIndexingReducer.SIMILARITY, IndexConfig.similarity.get()); conf.setInt(AbstractSamplingIndexingMapper.SAMPLE_PERCENTAGE, IndexConfig.samplePercentage.get()); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); Job job = new Job(conf, getJobName(params)); // Job's constructor copies conf, we need a reference to the one job // is actually using conf = job.getConfiguration(); job.setJarByClass(this.getClass()); job.setNumReduceTasks(IndexConfig.numPartitions.get()); for (String s : IndexConfig.input.get()) { Path spath = new Path(s); FileSystem fs = spath.getFileSystem(getConf()); List<FileStatus> stats = Lists.newArrayList(); addInputPathRecursively(stats, fs, spath, HdfsUtils.HIDDEN_FILE_FILTER); for (FileStatus foundStat : stats) { FileInputFormat.addInputPath(job, foundStat.getPath()); } } FileOutputFormat.setOutputPath(job, new Path(IndexConfig.index.get())); setupJob(job); // Delete the output directory if it exists already. Path outputDir = new Path(IndexConfig.index.get()); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); LOG.info("Job " + getJobName(params) + " started."); // TODO Jimmy has a parameter that controls whether we wait in Thud but not in ES. // when would we not want to wait? job.waitForCompletion(true); LOG.info("Job " + getJobName(params) + " Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); if (job.isSuccessful()) { writeIndexDescriptors(getIndexDescriptor()); } return job.isSuccessful() ? 0 : 1; }
From source file:com.xiaomi.linden.hadoop.indexing.job.LindenJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); String dir = conf.get(LindenJobConfig.INPUT_DIR, null); logger.info("input dir:" + dir); Path inputPath = new Path(StringUtils.unEscapeString(dir)); Path outputPath = new Path(conf.get(LindenJobConfig.OUTPUT_DIR)); String indexPath = conf.get(LindenJobConfig.INDEX_PATH); FileSystem fs = FileSystem.get(conf); if (fs.exists(outputPath)) { fs.delete(outputPath, true);/* ww w. ja v a2 s .c o m*/ } if (fs.exists(new Path(indexPath))) { fs.delete(new Path(indexPath), true); } int numShards = conf.getInt(LindenJobConfig.NUM_SHARDS, 1); Shard[] shards = createShards(indexPath, numShards); Shard.setIndexShards(conf, shards); //empty trash; (new Trash(conf)).expunge(); Job job = Job.getInstance(conf, "linden-hadoop-indexing"); job.setJarByClass(LindenJob.class); job.setMapperClass(LindenMapper.class); job.setCombinerClass(LindenCombiner.class); job.setReducerClass(LindenReducer.class); job.setMapOutputKeyClass(Shard.class); job.setMapOutputValueClass(IntermediateForm.class); job.setOutputKeyClass(Shard.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(IndexUpdateOutputFormat.class); job.setReduceSpeculativeExecution(false); job.setNumReduceTasks(numShards); String lindenSchemaFile = conf.get(LindenJobConfig.SCHEMA_FILE_URL); if (lindenSchemaFile == null) { throw new IOException("no schema file is found"); } logger.info("Adding schema file: " + lindenSchemaFile); job.addCacheFile(new URI(lindenSchemaFile + "#lindenSchema")); String lindenPropertiesFile = conf.get(LindenJobConfig.LINDEN_PROPERTIES_FILE_URL); if (lindenPropertiesFile == null) { throw new IOException("no linden properties file is found"); } logger.info("Adding linden properties file: " + lindenPropertiesFile); job.addCacheFile(new URI(lindenPropertiesFile + "#lindenProperties")); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); Path[] inputs = FileInputFormat.getInputPaths(job); StringBuilder buffer = new StringBuilder(inputs[0].toString()); for (int i = 1; i < inputs.length; i++) { buffer.append(","); buffer.append(inputs[i].toString()); } logger.info("mapreduce.input.dir = " + buffer.toString()); logger.info("mapreduce.output.dir = " + FileOutputFormat.getOutputPath(job).toString()); logger.info("mapreduce.job.num.reduce.tasks = " + job.getNumReduceTasks()); logger.info(shards.length + " shards = " + conf.get(LindenJobConfig.INDEX_SHARDS)); logger.info("mapreduce.input.format.class = " + job.getInputFormatClass()); logger.info("mapreduce.output.format.class = " + job.getOutputFormatClass()); logger.info("mapreduce.cluster.temp.dir = " + conf.get(MRJobConfig.TEMP_DIR)); job.waitForCompletion(true); if (!job.isSuccessful()) { throw new RuntimeException("Job failed"); } return 0; }
From source file:edu.gslis.ts.hadoop.ThriftBulkLoader.java
License:Apache License
public int run(String[] args) throws Exception { String tableName = args[0];// w w w . j a va2 s. com String inputPath = args[1]; String outputPath = args[2]; Path topicsFile = new Path(args[3]); Path vocabFile = new Path(args[4]); Path dateBinFile = new Path(args[5]); Configuration config = getConf(); config.set("hbase.table.name", tableName); HBaseConfiguration.addHbaseResources(config); Job job = Job.getInstance(config); job.setJarByClass(ThriftBulkLoader.class); job.setJobName("Bulk Loading HBase Table::" + tableName); job.setInputFormatClass(ThriftFileInputFormat.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapperClass(ThriftFilterMapper.class); Path output = new Path(outputPath); FileInputFormat.addInputPath(job, new Path(inputPath)); FileInputFormat.setInputDirRecursive(job, true); FileOutputFormat.setOutputPath(job, output); job.setMapOutputValueClass(Put.class); job.addCacheFile(topicsFile.toUri()); job.addCacheFile(vocabFile.toUri()); job.addCacheFile(dateBinFile.toUri()); job.getConfiguration().setBoolean("mapreduce.map.output.compress", true); job.getConfiguration().setClass("mapred.map.output.compression.codec", org.apache.hadoop.io.compress.SnappyCodec.class, org.apache.hadoop.io.compress.CompressionCodec.class); job.getConfiguration().set("hfile.compression", Compression.Algorithm.SNAPPY.getName()); //RegionLocator regionLocator = conn.getRegionLocator(tableName); //HFileOutputFormat2.configureIncrementalLoad(job, new HTable(config,tableName)); Connection con = ConnectionFactory.createConnection(config); TableName htableName = TableName.valueOf(tableName); HFileOutputFormat2.configureIncrementalLoad(job, con.getTable(htableName), con.getRegionLocator(htableName)); job.waitForCompletion(true); if (job.isSuccessful()) { // Couldn't find a better way to do this. The LoadIncrementalHFiles // seems to want 777 permissions on the output directory. try { Runtime rt = Runtime.getRuntime(); rt.exec("hadoop fs -chmod -R 777 " + output); } catch (Exception e) { e.printStackTrace(); } /* LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config); HTable htable = new HTable(config, tableName); loader.doBulkLoad(new Path(outputPath), htable); */ } else { throw new IOException("error with job"); } return 0; // - /* Job job = Job.getInstance(config); job.setJarByClass(ThriftBulkLoader.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setInputFormatClass(ThriftFileInputFormat.class); //HFileOutputFormat2.configureIncrementalLoad(job, htable); FileInputFormat.addInputPath(job, new Path(inputPath)); FileInputFormat.setInputDirRecursive(job, true); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.addCacheFile(topicsFile.toUri()); job.addCacheFile(vocabFile.toUri()); job.setMapperClass(ThriftFilterMapper.class); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job"); } LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config); loader.doBulkLoad(new Path(outputPath), htable); return 0; */ }
From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java
License:Open Source License
/** * Run the DT algorithm in MapReduce/*from w ww. ja v a 2s.c o m*/ * @param inPaths * @param outPath * @param params * @return * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static Job delaunayMapReduce(Path[] inPaths, Path outPath, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "Delaunay Triangulation"); job.setJarByClass(DelaunayTriangulation.class); // Set map and reduce job.setMapperClass(DelaunayMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Triangulation.class); job.setReducerClass(DelaunayReduce.class); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPaths); job.setOutputFormatClass(DelaunayTriangulationOutputFormat.class); TextOutputFormat.setOutputPath(job, outPath); // Set column boundaries to define the boundaries of each reducer SpatialSite.splitReduceSpace(job, inPaths, params); // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(params.getBoolean("verbose", false)); if (!job.isSuccessful()) throw new RuntimeException("Job failed!"); } else { job.submit(); } return job; }
From source file:edu.umn.cs.spatialHadoop.nasa.MultiHDFPlot.java
License:Open Source License
public static boolean multiplot(Path[] input, Path output, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException, ParseException { String timeRange = params.get("time"); final Date dateFrom, dateTo; final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd"); try {//from w ww .j a v a 2s . co m String[] parts = timeRange.split("\\.\\."); dateFrom = dateFormat.parse(parts[0]); dateTo = dateFormat.parse(parts[1]); } catch (ArrayIndexOutOfBoundsException e) { System.err.println("Use the seperator two periods '..' to seperate from and to dates"); return false; // To avoid an error that causes dateFrom to be uninitialized } catch (ParseException e) { System.err.println("Illegal date format in " + timeRange); return false; } // Number of frames to combine in each image int combine = params.getInt("combine", 1); // Retrieve all matching input directories based on date range Vector<Path> matchingPathsV = new Vector<Path>(); for (Path inputFile : input) { FileSystem inFs = inputFile.getFileSystem(params); FileStatus[] matchingDirs = inFs.listStatus(input, new PathFilter() { @Override public boolean accept(Path p) { String dirName = p.getName(); try { Date date = dateFormat.parse(dirName); return date.compareTo(dateFrom) >= 0 && date.compareTo(dateTo) <= 0; } catch (ParseException e) { LOG.warn("Cannot parse directory name: " + dirName); return false; } } }); for (FileStatus matchingDir : matchingDirs) matchingPathsV.add(new Path(matchingDir.getPath(), "*.hdf")); } if (matchingPathsV.isEmpty()) { LOG.warn("No matching directories to given input"); return false; } Path[] matchingPaths = matchingPathsV.toArray(new Path[matchingPathsV.size()]); Arrays.sort(matchingPaths); // Clear all paths to ensure we set our own paths for each job params.clearAllPaths(); // Create a water mask if we need to recover holes on write if (params.get("recover", "none").equals("write")) { // Recover images on write requires a water mask image to be generated first OperationsParams wmParams = new OperationsParams(params); wmParams.setBoolean("background", false); Path wmImage = new Path(output, new Path("water_mask")); HDFPlot.generateWaterMask(wmImage, wmParams); params.set(HDFPlot.PREPROCESSED_WATERMARK, wmImage.toString()); } // Start a job for each path int imageWidth = -1; int imageHeight = -1; boolean overwrite = params.getBoolean("overwrite", false); boolean pyramid = params.getBoolean("pyramid", false); FileSystem outFs = output.getFileSystem(params); Vector<Job> jobs = new Vector<Job>(); boolean background = params.getBoolean("background", false); Rectangle mbr = new Rectangle(-180, -90, 180, 90); for (int i = 0; i < matchingPaths.length; i += combine) { Path[] inputPaths = new Path[Math.min(combine, matchingPaths.length - i)]; System.arraycopy(matchingPaths, i, inputPaths, 0, inputPaths.length); Path outputPath = new Path(output, inputPaths[0].getParent().getName() + (pyramid ? "" : ".png")); if (overwrite || !outFs.exists(outputPath)) { // Need to plot Job rj = HDFPlot.plotHeatMap(inputPaths, outputPath, params); if (imageHeight == -1 || imageWidth == -1) { if (rj != null) { imageHeight = rj.getConfiguration().getInt("height", 1000); imageWidth = rj.getConfiguration().getInt("width", 1000); mbr = (Rectangle) OperationsParams.getShape(rj.getConfiguration(), "mbr"); } else { imageHeight = params.getInt("height", 1000); imageWidth = params.getInt("width", 1000); mbr = (Rectangle) OperationsParams.getShape(params, "mbr"); } } if (background && rj != null) jobs.add(rj); } } // Wait until all jobs are done while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob.getJobID()); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); throw new RuntimeException("Error running job " + firstJob.getJobID()); } jobs.remove(0); } // Draw the scale in the output path if needed String scalerange = params.get("scalerange"); if (scalerange != null) { String[] parts = scalerange.split("\\.\\."); double min = Double.parseDouble(parts[0]); double max = Double.parseDouble(parts[1]); String scale = params.get("scale", "none").toLowerCase(); if (scale.equals("vertical")) { MultiHDFPlot.drawVerticalScale(new Path(output, "scale.png"), min, max, 64, imageHeight, params); } else if (scale.equals("horizontal")) { MultiHDFPlot.drawHorizontalScale(new Path(output, "scale.png"), min, max, imageWidth, 64, params); } } // Add the KML file createKML(outFs, output, mbr, params); return true; }