List of usage examples for org.apache.hadoop.mapreduce Job isSuccessful
public boolean isSuccessful() throws IOException
From source file:gaffer.accumulo.inputformat.example.ExampleDriver.java
License:Apache License
public int run(String[] args) throws Exception { // Usage/*from w w w . j a va2s . c om*/ if (args.length != 6 && args.length != 7) { System.err.println(USAGE); return 1; } // Parse options Path outputPath = new Path(args[0]); String accumuloPropertiesFile = args[1]; int numReduceTasks; try { numReduceTasks = Integer.parseInt(args[2]); } catch (NumberFormatException e) { System.err.println(USAGE); return 1; } Date startDate = null; Date endDate = null; boolean useTimeWindow = false; if (!args[3].equals("null") && !args[4].equals("null")) { try { startDate = DATE_FORMAT.parse(args[3]); endDate = DATE_FORMAT.parse(args[4]); } catch (ParseException e) { System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage()); return 1; } useTimeWindow = true; } boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]); boolean seedsSpecified = (args.length == 7); String seedsFile = ""; if (seedsSpecified) { seedsFile = args[6]; } // Hadoop configuration Configuration conf = getConf(); // Connect to Accumulo, so we can check connection and check that the // table exists AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile); Connector conn = Accumulo.connect(accConf); String tableName = accConf.getTable(); Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName()); // Check if the table exists if (!conn.tableOperations().exists(tableName)) { System.err.println("Table " + tableName + " does not exist."); return 1; } // Create AccumuloBackedGraph and set view AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName); // - Time window if (useTimeWindow) { graph.setTimeWindow(startDate, endDate); } // - Roll up over time and visibility iterator graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility); // - If not specifying seeds then add iterator to avoid seeing the same edge multiple times if (seedsSpecified) { Set<TypeValue> typeValues = new HashSet<TypeValue>(); BufferedReader reader = new BufferedReader(new FileReader(seedsFile)); String line; while ((line = reader.readLine()) != null) { String[] tokens = line.split("\\|"); if (tokens.length != 2) { System.err.println("Invalid line: " + line); continue; } String type = tokens[0]; String value = tokens[1]; typeValues.add(new TypeValue(type, value)); } reader.close(); // Use AccumuloBackedGraph to update the configuration with the view added above graph.setConfiguration(conf, typeValues, accConf); } else { // Use AccumuloBackedGraph to update the configuration with the view added above graph.setConfiguration(conf, accConf); } // Conf conf.setBoolean("mapred.compress.map.output", true); conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class); // Job Job job = new Job(conf); job.setJarByClass(getClass()); job.setJobName("Example MapReduce against Gaffer data in Accumulo format: input = " + tableName + ", output = " + outputPath); // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat. if (seedsSpecified) { job.setInputFormatClass(BatchScannerElementInputFormat.class); } else { job.setInputFormatClass(ElementInputFormat.class); } // Mapper job.setMapperClass(ExampleMapper.class); job.setMapOutputKeyClass(GraphElement.class); job.setMapOutputValueClass(SetOfStatistics.class); // Reducer - use default IdentityReducer for this example job.setOutputKeyClass(GraphElement.class); job.setOutputValueClass(SetOfStatistics.class); job.setNumReduceTasks(numReduceTasks); // Output job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); System.out.println("Running MapReduce job over:"); System.out.println("\tTable: " + accConf.getTable()); System.out.println("\tUser: " + accConf.getUserName()); System.out.println("\tAuths: " + authorizations); if (useTimeWindow) { System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", " + DATE_FORMAT.format(endDate)); } else { System.out.println("\tFilter by time is off"); } System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility); // Run job job.waitForCompletion(true); // Successful? if (!job.isSuccessful()) { System.err.println("Error running job"); return 1; } return 0; }
From source file:gaffer.accumulo.splitpoints.EstimateSplitPointsDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 5) { System.err.println("Usage: " + this.getClass().getName() + " <mapred_output_directory> <proportion_to_sample> <number_of_tablet_servers> <resulting_split_file> <input_path1>..."); return 1; }//w w w. j a va2 s .c om // Parse arguments Path outputPath = new Path(args[0]); float proportionToSample = Float.parseFloat(args[1]); int numberTabletServers = Integer.parseInt(args[2]); Path resultingSplitsFile = new Path(args[3]); Path[] inputPaths = new Path[args.length - 4]; for (int i = 0; i < inputPaths.length; i++) { inputPaths[i] = new Path(args[i + 4]); } // Conf and job Configuration conf = getConf(); conf.setFloat("proportion_to_sample", proportionToSample); String jobName = "Estimate split points: input = "; for (int i = 0; i < inputPaths.length; i++) { jobName += inputPaths[i] + ", "; } jobName += "output = " + outputPath; Job job = Job.getInstance(conf, jobName); job.setJarByClass(getClass()); // Input job.setInputFormatClass(SequenceFileInputFormat.class); for (int i = 0; i < inputPaths.length; i++) { SequenceFileInputFormat.addInputPath(job, inputPaths[i]); } // Mapper job.setMapperClass(EstimateSplitPointsMapper.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); // Reducer job.setReducerClass(EstimateSplitPointsReducer.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); job.setNumReduceTasks(1); // Output job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); // Run job job.waitForCompletion(true); // Successful? if (!job.isSuccessful()) { System.err.println("Error running job"); return 1; } // Number of records output // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier // versions of Hadoop. @SuppressWarnings("deprecation") Counter counter = job.getCounters() .findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS); long recordsOutput = counter.getValue(); System.out.println("Number of records output = " + recordsOutput); // Work out when to output a split point. The number of split points // needed is the number of tablet servers minus 1 (because you don't // have to output the start of the first tablet or the end of the // last tablet). long outputEveryNthRecord = recordsOutput / (numberTabletServers - 1); // Read through resulting file, pick out the split points and write to // file. FileSystem fs = FileSystem.get(conf); Path resultsFile = new Path(outputPath, "part-r-00000"); @SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf); PrintStream splitsWriter = new PrintStream(new BufferedOutputStream(fs.create(resultingSplitsFile, true))); Key key = new Key(); Value value = new Value(); long count = 0; int numberSplitPointsOutput = 0; while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) { count++; if (count % outputEveryNthRecord == 0) { numberSplitPointsOutput++; splitsWriter.println(new String(Base64.encodeBase64(key.getRow().getBytes()))); System.out.println("Written split point: " + key.getRow()); } } reader.close(); splitsWriter.close(); System.out.println("Number of split points output = " + numberSplitPointsOutput); return 0; }
From source file:gaffer.accumulostore.operation.hdfs.handler.job.tool.FetchElementsFromHdfsTool.java
License:Apache License
@Override public int run(final String[] strings) throws Exception { checkHdfsDirectories(operation);//w ww . j a va 2s.com LOGGER.info("Ensuring table {} exists", store.getProperties().getTable()); TableUtils.ensureTableExists(store); LOGGER.info("Adding elements from HDFS"); final Job job = new AccumuloAddElementsFromHdfsJobFactory().createJob(operation, store); job.waitForCompletion(true); if (!job.isSuccessful()) { LOGGER.error("Error running job"); throw new OperationException("Error running job"); } LOGGER.info("Finished adding elements from HDFS"); return SUCCESS_RESPONSE; }
From source file:gaffer.accumulostore.operation.hdfs.handler.tool.FetchElementsFromHdfs.java
License:Apache License
@Override public int run(final String[] strings) throws Exception { TableUtils.ensureTableExists(store); final Job job = new AccumuloAddElementsFromHdfsJobFactory().createJob(operation, store); job.waitForCompletion(true);/*from w w w . j av a 2 s. co m*/ if (!job.isSuccessful()) { throw new OperationException("Error running job"); } return SUCCESS_RESPONSE; }
From source file:gaffer.analytic.impl.GraphStatistics.java
License:Apache License
public int run(String[] args) throws Exception { // Usage/*from w ww .jav a 2 s . com*/ if (args.length != 6 && args.length != 7) { System.err.println(USAGE); return 1; } // Parse options Path outputPath = new Path(args[0]); String accumuloPropertiesFile = args[1]; int numReduceTasks; try { numReduceTasks = Integer.parseInt(args[2]); } catch (NumberFormatException e) { System.err.println(USAGE); return 1; } Date startDate = null; Date endDate = null; boolean useTimeWindow = false; if (!args[3].equals("null") && !args[4].equals("null")) { try { startDate = DATE_FORMAT.parse(args[3]); endDate = DATE_FORMAT.parse(args[4]); } catch (ParseException e) { System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage()); return 1; } useTimeWindow = true; } boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]); boolean seedsSpecified = (args.length == 7); String seedsFile = ""; if (seedsSpecified) { seedsFile = args[6]; } // Hadoop configuration Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); // Connect to Accumulo, so we can check connection and check that the // table exists AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile); Connector conn = Accumulo.connect(accConf); String tableName = accConf.getTable(); Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName()); // Check if the table exists if (!conn.tableOperations().exists(tableName)) { System.err.println("Table " + tableName + " does not exist."); return 1; } // Create graph and update configuration based on the view AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName); if (useTimeWindow) { graph.setTimeWindow(startDate, endDate); } graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility); if (seedsSpecified) { Set<TypeValue> typeValues = new HashSet<TypeValue>(); BufferedReader reader = new BufferedReader(new FileReader(seedsFile)); String line; while ((line = reader.readLine()) != null) { String[] tokens = line.split("\\|"); if (tokens.length != 2) { System.err.println("Invalid line: " + line); continue; } String type = tokens[0]; String value = tokens[1]; typeValues.add(new TypeValue(type, value)); } reader.close(); graph.setConfiguration(conf, typeValues, accConf); } else { graph.setConfiguration(conf, accConf); } // Conf conf.setBoolean("mapred.compress.map.output", true); conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class); // Job Job job = new Job(conf); job.setJarByClass(getClass()); job.setJobName("Running MapReduce against Gaffer data in Accumulo: input = " + tableName + ", output = " + outputPath); // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat. if (seedsSpecified) { job.setInputFormatClass(BatchScannerElementInputFormat.class); } else { job.setInputFormatClass(ElementInputFormat.class); } // Mapper job.setMapperClass(GraphStatisticsMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(SetOfStatistics.class); // Combiner job.setCombinerClass(GraphStatisticsReducer.class); // Reducer job.setReducerClass(GraphStatisticsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SetOfStatistics.class); job.setNumReduceTasks(numReduceTasks); // Output job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); System.out.println("Running MapReduce job over:"); System.out.println("\tTable: " + accConf.getTable()); System.out.println("\tUser: " + accConf.getUserName()); System.out.println("\tAuths: " + authorizations); if (useTimeWindow) { System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", " + DATE_FORMAT.format(endDate)); } else { System.out.println("\tFilter by time is off"); } System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility); // Run job job.waitForCompletion(true); // Successful? if (!job.isSuccessful()) { System.err.println("Error running job"); return 1; } // Write results out System.out.println("Summary of graph"); for (FileStatus file : fs.listStatus(outputPath)) { if (!file.isDirectory() && !file.getPath().getName().contains("_SUCCESS")) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf); Text text = new Text(); SetOfStatistics stats = new SetOfStatistics(); while (reader.next(text, stats)) { System.out.println(text + ", " + stats); } reader.close(); } } return 0; }
From source file:gobblin.compaction.mapreduce.MRCompactorJobRunner.java
License:Apache License
private void submitAndWait(Job job) throws ClassNotFoundException, IOException, InterruptedException { job.submit();/* w w w .j a v a 2 s . c o m*/ MRCompactor.addRunningHadoopJob(this.dataset, job); LOG.info(String.format("MR job submitted for dataset %s, input %s, url: %s", this.dataset, getInputPaths(), job.getTrackingURL())); while (!job.isComplete()) { if (this.policy == Policy.ABORT_ASAP) { LOG.info(String.format("MR job for dataset %s, input %s killed due to input data incompleteness." + " Will try again later", this.dataset, getInputPaths())); job.killJob(); return; } Thread.sleep(MR_JOB_CHECK_COMPLETE_INTERVAL_MS); } if (!job.isSuccessful()) { throw new RuntimeException(String.format("MR job failed for topic %s, input %s, url: %s", this.dataset, getInputPaths(), job.getTrackingURL())); } }
From source file:gobblin.runtime.mapreduce.MRTask.java
License:Apache License
@Override public void run() { try {//from www.j a v a 2s .com Job job = createJob(); job.submit(); this.eventSubmitter.submit(Events.MR_JOB_STARTED_EVENT, Events.JOB_URL, job.getTrackingURL()); job.waitForCompletion(false); if (job.isSuccessful()) { this.eventSubmitter.submit(Events.MR_JOB_SUCCESSFUL, Events.JOB_URL, job.getTrackingURL()); this.workingState = WorkUnitState.WorkingState.SUCCESSFUL; } else { this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.JOB_URL, job.getTrackingURL()); this.workingState = WorkUnitState.WorkingState.FAILED; } } catch (Throwable t) { log.error("Failed to run MR job.", t); this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.FAILURE_CONTEXT, t.getMessage()); this.workingState = WorkUnitState.WorkingState.FAILED; } }
From source file:io.amient.kafka.hadoop.testutils.SystemTestBase.java
License:Apache License
protected Path runSimpleJob(String topic, String testOutputDir) throws InterruptedException, IOException, ClassNotFoundException { //run hadoop loader job Path outDir = new Path(new File(dfsBaseDir, testOutputDir).toString()); localFileSystem.delete(outDir, true); KafkaInputFormat.configureKafkaTopics(conf, topic); KafkaInputFormat.configureZkConnection(conf, zkConnect); Job job = Job.getInstance(conf, "kafka.hadoop.loader"); job.setNumReduceTasks(0);//w w w. jav a 2 s . c o m job.setInputFormatClass(KafkaInputFormat.class); job.setMapperClass(HadoopJobMapper.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(MultiOutputFormat.class); MultiOutputFormat.setOutputPath(job, outDir); MultiOutputFormat.setCompressOutput(job, false); job.waitForCompletion(true); if (!job.isSuccessful()) throw new Error("job failed - see logs for details"); fs.copyToLocalFile(outDir, outDir); return outDir; }
From source file:io.dataapps.chlorine.hadoop.AbstractPipeline.java
License:Apache License
private synchronized boolean waitForCompletion(Job j, boolean verbose) throws IOException, InterruptedException, ClassNotFoundException { j.submit();/*from www . j a v a 2 s . c o m*/ while (!j.isComplete()) { try { wait(5000); } catch (InterruptedException ie) { } } return j.isSuccessful(); }
From source file:io.hops.erasure_coding.MapReduceBlockRepairManager.java
License:Apache License
@Override public List<Report> computeReports() { List<Report> reports = new ArrayList<Report>(); for (Map.Entry<String, Job> entry : currentRepairs.entrySet()) { String fileName = entry.getKey(); Job job = entry.getValue(); try {/*from w w w . ja v a 2s . c om*/ if (job.isComplete() && job.isSuccessful()) { LOG.info("REPAIR COMPLETE"); reports.add(new Report(fileName, Report.Status.FINISHED)); cleanup(job); } else if (job.isComplete() && !job.isSuccessful()) { LOG.info("REPAIR FAILED"); reports.add(new Report(fileName, Report.Status.FAILED)); cleanup(job); } /* TODO FIX timeout else if (System.currentTimeMillis() - job.getStartTime() > getMaxFixTimeForFile()) { LOG.info("Timeout: " + (System.currentTimeMillis() - job.getStartTime()) + " " + job.getStartTime()); job.killJob(); reports.add(new Report(fileName, Report.Status.CANCELED)); cleanup(job); }*/ else { LOG.info("REPAIR RUNNING"); reports.add(new Report(fileName, Report.Status.ACTIVE)); } } catch (Exception e) { LOG.info("Exception during completeness check", e); try { job.killJob(); } catch (Exception e1) { } reports.add(new Report(fileName, Report.Status.FAILED)); cleanup(job); } } for (Report report : reports) { Report.Status status = report.getStatus(); if (status == Report.Status.FINISHED || status == Report.Status.FAILED || status == Report.Status.CANCELED) { currentRepairs.remove(report.getFilePath()); } } return reports; }