List of usage examples for org.apache.hadoop.mapreduce Job getJobID
public JobID getJobID()
From source file:kogiri.common.report.Report.java
License:Open Source License
private String makeText(Job job) { String jobName = job.getJobName(); String jobID = job.getJobID().toString(); String jobStatus;/* w w w . j a v a 2 s .co m*/ try { jobStatus = job.getJobState().name(); } catch (IOException ex) { jobStatus = "Unknown"; } catch (InterruptedException ex) { jobStatus = "Unknown"; } String startTimeStr; try { startTimeStr = TimeHelper.getTimeString(job.getStartTime()); } catch (Exception ex) { startTimeStr = "Unknown"; } String finishTimeStr; try { finishTimeStr = TimeHelper.getTimeString(job.getFinishTime()); } catch (Exception ex) { finishTimeStr = "Unknown"; } String timeTakenStr; try { timeTakenStr = TimeHelper.getDiffTimeString(job.getStartTime(), job.getFinishTime()); } catch (Exception ex) { timeTakenStr = "Unknown"; } String countersStr; try { countersStr = job.getCounters().toString(); } catch (Exception ex) { countersStr = "Unknown"; } return "Job : " + jobName + "\n" + "JobID : " + jobID + "\n" + "Status : " + jobStatus + "\n" + "StartTime : " + startTimeStr + "\n" + "FinishTime : " + finishTimeStr + "\n" + "TimeTaken : " + timeTakenStr + "\n\n" + countersStr; }
From source file:org.apache.druid.indexer.DeterminePartitionsJob.java
License:Apache License
@Override public boolean run() { try {//from w w w . j a v a 2s.co m /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) { throw new ISE( "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", config.getPartitionsSpec()); } final SingleDimensionPartitionsSpec partitionsSpec = (SingleDimensionPartitionsSpec) config .getPartitionsSpec(); if (!partitionsSpec.isAssumeGrouped()) { groupByJob = Job.getInstance(new Configuration(), StringUtils.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); JobHelper.injectSystemProperties(groupByJob); config.addJobProperties(groupByJob); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); // Store the jobId in the file if (groupByJob.getJobID() != null) { JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString()); } try { if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER); return false; } } catch (IOException ioe) { if (!Utils.checkAppSuccessForJobIOException(ioe, groupByJob, config.isUseYarnRMJobStatusFallback())) { throw ioe; } } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = Job.getInstance(new Configuration(), StringUtils.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); JobHelper.injectSystemProperties(dimSelectionJob); config.addJobProperties(dimSelectionJob); if (!partitionsSpec.isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob, DeterminePartitionsDimSelectionPartitioner.class); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size()); JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); // Store the jobId in the file if (dimSelectionJob.getJobID() != null) { JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), dimSelectionJob.getJobID().toString()); } try { if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); failureCause = Utils.getFailureMessage(dimSelectionJob, config.JSON_MAPPER); return false; } } catch (IOException ioe) { if (!Utils.checkAppSuccessForJobIOException(ioe, dimSelectionJob, config.isUseYarnRMJobStatusFallback())) { throw ioe; } } /* * Load partitions determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>(); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) { final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) { List<ShardSpec> specs = config.JSON_MAPPER.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() { }); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i)); } shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public static void cleanup(Job job) throws IOException { final Path jobDir = getJobPath(job.getJobID(), job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); RuntimeException e = null;/* w w w. j a v a 2s . co m*/ try { JobHelper.deleteWithRetry(fs, jobDir, true); } catch (RuntimeException ex) { e = ex; } try { JobHelper.deleteWithRetry(fs, getJobClassPathDir(job.getJobName(), job.getWorkingDirectory()), true); } catch (RuntimeException ex) { if (e == null) { e = ex; } else { e.addSuppressed(ex); } } if (e != null) { throw e; } }
From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public List<DataSegment> run() throws IOException { final JobConf jobConf = new JobConf(); jobConf.setKeepFailedTaskFiles(false); for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) { jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()"); }// ww w . j a va2s .c o m final List<DataSegment> segments = converterConfig.getSegments(); if (segments.isEmpty()) { throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource()); } converterConfigIntoConfiguration(converterConfig, segments, jobConf); jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); setJobName(jobConf, segments); if (converterConfig.getJobPriority() != null) { jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority())); } final Job job = Job.getInstance(jobConf); job.setInputFormatClass(ConfigInputFormat.class); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapSpeculativeExecution(false); job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job); Throwable throwable = null; try { job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); final boolean success = job.waitForCompletion(true); if (!success) { final TaskReport[] reports = job.getTaskReports(TaskType.MAP); if (reports != null) { for (final TaskReport report : reports) { log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); } } return null; } try { loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); } catch (IOException ex) { log.error(ex, "Could not fetch counters"); } final JobID jobID = job.getJobID(); final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true); final List<Path> goodPaths = new ArrayList<>(); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); if (locatedFileStatus.isFile()) { final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY)); } } } if (goodPaths.isEmpty()) { log.warn("No good data found at [%s]", jobDir); return null; } final List<DataSegment> returnList = ImmutableList .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() { @Nullable @Override public DataSegment apply(final Path input) { try { if (!fs.exists(input)) { throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir); } } catch (final IOException e) { throw Throwables.propagate(e); } try (final InputStream stream = fs.open(input)) { return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class); } catch (final IOException e) { throw Throwables.propagate(e); } } })); if (returnList.size() == segments.size()) { return returnList; } else { throw new ISE( "Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir); } } catch (InterruptedException | ClassNotFoundException e) { RuntimeException exception = Throwables.propagate(e); throwable = exception; throw exception; } catch (Throwable t) { throwable = t; throw t; } finally { try { cleanup(job); } catch (IOException e) { if (throwable != null) { throwable.addSuppressed(e); } else { log.error(e, "Could not clean up job [%s]", job.getJobID()); } } } }
From source file:org.apache.druid.indexer.Utils.java
License:Apache License
private static void checkAppSuccessFromYarnRMOnce(HttpClient httpClient, Job job, AtomicBoolean succeeded) throws IOException, InterruptedException, ExecutionException, TimeoutException { String appId = StringUtils.replace(job.getJobID().toString(), "job", "application"); String yarnRM = job.getConfiguration().get("yarn.resourcemanager.webapp.address"); String yarnEndpoint = StringUtils.format("http://%s/ws/v1/cluster/apps/%s", yarnRM, appId); log.info("Attempting to retrieve app status from YARN ResourceManager at [%s].", yarnEndpoint); ContentResponse res = httpClient.GET(yarnEndpoint); log.info("App status response from YARN RM: " + res.getContentAsString()); Map<String, Object> respMap = HadoopDruidIndexerConfig.JSON_MAPPER.readValue(res.getContentAsString(), new TypeReference<Map<String, Object>>() { });//from w w w . ja va 2s .c om Map<String, Object> appMap = (Map<String, Object>) respMap.get("app"); String state = (String) appMap.get("state"); String finalStatus = (String) appMap.get("finalStatus"); if ("FINISHED".equals(state) && "SUCCEEDED".equals(finalStatus)) { succeeded.set(true); } }
From source file:org.apache.falcon.hive.HiveDRTool.java
License:Apache License
public Job execute() throws Exception { assert inputOptions != null; assert getConf() != null; executionStage = inputOptions.getExecutionStage(); LOG.info("Executing Workflow stage : {}", executionStage); if (executionStage.equalsIgnoreCase(HiveDRUtils.ExecutionStage.LASTEVENTS.name())) { String lastEventsIdFile = getLastEvents(jobConf); LOG.info("Last successfully replicated Event file : {}", lastEventsIdFile); return null; } else if (executionStage.equalsIgnoreCase(HiveDRUtils.ExecutionStage.EXPORT.name())) { createStagingDirectory();/* w ww . jav a2s. c o m*/ eventsMetaFile = sourceEvents(); LOG.info("Sourced Events meta file : {}", eventsMetaFile); if (StringUtils.isEmpty(eventsMetaFile)) { LOG.info("No events to process"); return null; } else { /* * eventsMetaFile contains the events to be processed by HiveDr. This file should be available * for the import action as well. Persist the file at a location common to both export and import. */ persistEventsMetafileLocation(eventsMetaFile); } } else if (executionStage.equalsIgnoreCase(HiveDRUtils.ExecutionStage.IMPORT.name())) { // read the location of eventsMetaFile from hdfs eventsMetaFile = getEventsMetaFileLocation(); if (StringUtils.isEmpty(eventsMetaFile)) { LOG.info("No events to process"); return null; } } else { throw new HiveReplicationException("Invalid Execution stage : " + inputOptions.getExecutionStage()); } Job job = createJob(); job.submit(); String jobID = job.getJobID().toString(); job.getConfiguration().set("HIVEDR_JOB_ID", jobID); LOG.info("HiveDR job-id: {}", jobID); if (inputOptions.shouldBlock() && !job.waitForCompletion(true)) { throw new IOException( "HiveDR failure: Job " + jobID + " has failed: " + job.getStatus().getFailureInfo()); } return job; }
From source file:org.apache.falcon.hive.util.EventUtils.java
License:Apache License
public void invokeCopy() throws Exception { DistCpOptions options = getDistCpOptions(); DistCp distCp = new DistCp(conf, options); LOG.info("Started DistCp with source Path: {} \ttarget path: {}", sourceStagingUri, targetStagingUri); Job distcpJob = distCp.execute(); LOG.info("Distp Hadoop job: {}", distcpJob.getJobID().toString()); LOG.info("Completed DistCp"); if (distcpJob.getStatus().getState() == JobStatus.State.SUCCEEDED) { countersMap = HiveDRUtils.fetchReplicationCounters(conf, distcpJob); }/* w ww . jav a 2 s . c o m*/ }
From source file:org.apache.falcon.snapshots.replication.HdfsSnapshotReplicator.java
License:Apache License
protected void invokeCopy(String sourceStorageUrl, String targetStorageUrl, DistributedFileSystem sourceFs, DistributedFileSystem targetFs, String sourceDir, String targetDir, String currentSnapshotName) throws FalconException { try {// w ww . ja v a 2s. c o m Configuration jobConf = this.getConf(); DistCpOptions options = getDistCpOptions(sourceStorageUrl, targetStorageUrl, sourceFs, targetFs, sourceDir, targetDir, currentSnapshotName); DistCp distCp = new DistCp(jobConf, options); LOG.info("Started Snapshot based DistCp from {} to {} ", getStagingUri(sourceStorageUrl, sourceDir), getStagingUri(targetStorageUrl, targetDir)); Job distcpJob = distCp.execute(); LOG.info("Distp Hadoop job: {}", distcpJob.getJobID().toString()); LOG.info("Completed Snapshot based DistCp"); } catch (FalconException fe) { throw fe; } catch (Exception e) { throw new FalconException("Unable to replicate HDFS directory using snapshots.", e); } }
From source file:org.apache.giraph.job.HadoopUtils.java
License:Apache License
/** * Get Job ID from job.//from w w w . j av a 2s .c om * May return null for hadoop 0.20.203 * @param job submitted job * @return JobId for submitted job. */ public static JobID getJobID(Job job) { /*if[HADOOP_JOB_ID_AVAILABLE] return job.getID(); else[HADOOP_JOB_ID_AVAILABLE]*/ return job.getJobID(); /*end[HADOOP_JOB_ID_AVAILABLE]*/ }
From source file:org.apache.hadoop.examples.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/* w w w . j a va 2 s. c o m*/ */ public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(conf); //setup job conf job.setJobName(QuasiMonteCarlo.class.getSimpleName()); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { throw new IOException( "Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = Time.monotonicNow(); job.waitForCompletion(true); if (!job.isSuccessful()) { System.out.println("Job " + job.getJobID() + " failed!"); System.exit(1); } final double duration = (Time.monotonicNow() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints)); return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP); } finally { fs.delete(tmpDir, true); } }