List of usage examples for org.apache.hadoop.mapreduce Job getJobName
public String getJobName()
From source file:io.dataapps.chlorine.hadoop.AbstractPipeline.java
License:Apache License
protected boolean runJobToCompletion(Job j) { LOG.info("***********Run job: " + j.getJobName()); try {//from ww w. jav a2 s . c o m if (!waitForCompletion(j, true)) { LOG.error("Job " + j.getJobName() + " failed."); return false; } } catch (Exception e) { LOG.error(e); return false; } LOG.info("Job " + j.getJobName() + " successful."); return true; }
From source file:io.druid.indexer.DetermineHashedPartitionsJob.java
License:Apache License
public boolean run() { try {//from w w w. j a v a 2 s. c o m /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ long startTime = System.currentTimeMillis(); final Job groupByJob = Job.getInstance(new Configuration(), String .format("%s-determine_partitions_hashed-%s", config.getDataSource(), config.getIntervals())); JobHelper.injectSystemProperties(groupByJob); config.addJobProperties(groupByJob); groupByJob.setMapperClass(DetermineCardinalityMapper.class); groupByJob.setMapOutputKeyClass(LongWritable.class); groupByJob.setMapOutputValueClass(BytesWritable.class); groupByJob.setReducerClass(DetermineCardinalityReducer.class); groupByJob.setOutputKeyClass(NullWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); groupByJob.setPartitionerClass(DetermineHashedPartitionsPartitioner.class); if (!config.getSegmentGranularIntervals().isPresent()) { groupByJob.setNumReduceTasks(1); } else { groupByJob.setNumReduceTasks(config.getSegmentGranularIntervals().get().size()); } JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); return false; } /* * Load partitions and intervals determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; if (!config.getSegmentGranularIntervals().isPresent()) { final Path intervalInfoPath = config.makeIntervalInfoPath(); fileSystem = intervalInfoPath.getFileSystem(groupByJob.getConfiguration()); if (!Utils.exists(groupByJob, fileSystem, intervalInfoPath)) { throw new ISE("Path[%s] didn't exist!?", intervalInfoPath); } List<Interval> intervals = config.jsonMapper.readValue( Utils.openInputStream(groupByJob, intervalInfoPath), new TypeReference<List<Interval>>() { }); config.setGranularitySpec( new UniformGranularitySpec(config.getGranularitySpec().getSegmentGranularity(), config.getGranularitySpec().getQueryGranularity(), intervals)); log.info("Determined Intervals for Job [%s]" + config.getSegmentGranularIntervals()); } Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) { DateTime bucket = segmentGranularity.getStart(); final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(groupByJob.getConfiguration()); } if (Utils.exists(groupByJob, fileSystem, partitionInfoPath)) { final Long numRows = config.jsonMapper.readValue( Utils.openInputStream(groupByJob, partitionInfoPath), new TypeReference<Long>() { }); log.info("Found approximately [%,d] rows in data.", numRows); final int numberOfShards = (int) Math.ceil((double) numRows / config.getTargetPartitionSize()); log.info("Creating [%,d] shards", numberOfShards); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(numberOfShards); if (numberOfShards == 1) { actualSpecs.add(new HadoopyShardSpec(new NoneShardSpec(), shardCount++)); } else { for (int i = 0; i < numberOfShards; ++i) { actualSpecs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, numberOfShards, HadoopDruidIndexerConfig.jsonMapper), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i)); } } shardSpecs.put(bucket, actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); log.info("DetermineHashedPartitionsJob took %d millis", (System.currentTimeMillis() - startTime)); return true; } catch (Exception e) { throw Throwables.propagate(e); } }
From source file:io.druid.indexer.DeterminePartitionsJob.java
License:Apache License
public boolean run() { try {/*from ww w. j ava2 s . c om*/ /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) { throw new ISE( "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", config.getPartitionsSpec()); } if (!config.getPartitionsSpec().isAssumeGrouped()) { final Job groupByJob = Job.getInstance(new Configuration(), String.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); JobHelper.injectSystemProperties(groupByJob); config.addJobProperties(groupByJob); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); return false; } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = Job.getInstance(new Configuration(), String.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); JobHelper.injectSystemProperties(dimSelectionJob); config.addJobProperties(dimSelectionJob); if (!config.getPartitionsSpec().isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class); dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size()); JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); return false; } /* * Load partitions determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) { final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) { List<ShardSpec> specs = config.jsonMapper.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() { }); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i)); } shardSpecs.put(segmentGranularity.getStart(), actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw Throwables.propagate(e); } }
From source file:io.druid.indexer.IndexGeneratorJob.java
License:Apache License
public boolean run() { try {/*from w ww .ja v a 2s . co m*/ Job job = Job.getInstance(new Configuration(), String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals())); job.getConfiguration().set("io.sort.record.percent", "0.23"); JobHelper.injectSystemProperties(job); config.addJobProperties(job); job.setMapperClass(IndexGeneratorMapper.class); job.setMapOutputValueClass(BytesWritable.class); SortableBytes.useSortableBytesAsMapOutputKey(job); int numReducers = Iterables.size(config.getAllBuckets().get()); if (numReducers == 0) { throw new RuntimeException("No buckets?? seems there is no data to index."); } if (config.getSchema().getTuningConfig().getUseCombiner()) { job.setCombinerClass(IndexGeneratorCombiner.class); job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class); } job.setNumReduceTasks(numReducers); job.setPartitionerClass(IndexGeneratorPartitioner.class); setReducerClass(job); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(IndexGeneratorOutputFormat.class); FileOutputFormat.setOutputPath(job, config.makeIntermediatePath()); config.addInputPaths(job); // hack to get druid.processing.bitmap property passed down to hadoop job. // once IndexIO doesn't rely on globally injected properties, we can move this into the HadoopTuningConfig. final String bitmapProperty = "druid.processing.bitmap.type"; final String bitmapType = HadoopDruidIndexerConfig.properties.getProperty(bitmapProperty); if (bitmapType != null) { for (String property : new String[] { "mapreduce.reduce.java.opts", "mapreduce.map.java.opts" }) { // prepend property to allow overriding using hadoop.xxx properties by JobHelper.injectSystemProperties above String value = Strings.nullToEmpty(job.getConfiguration().get(property)); job.getConfiguration().set(property, String.format("-D%s=%s %s", bitmapProperty, bitmapType, value)); } } config.intoConfiguration(job); JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), job); job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); boolean success = job.waitForCompletion(true); Counter invalidRowCount = job.getCounters() .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER); jobStats.setInvalidRowCount(invalidRowCount.getValue()); return success; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:io.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public static void cleanup(Job job) throws IOException { final Path jobDir = getJobPath(job.getJobID(), job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); fs.delete(jobDir, true);//from w w w. ja v a 2 s. co m fs.delete(getJobClassPathDir(job.getJobName(), job.getWorkingDirectory()), true); }
From source file:io.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public List<DataSegment> run() throws IOException { final JobConf jobConf = new JobConf(); jobConf.setKeepFailedTaskFiles(false); for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) { jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()"); }// w ww .j a va 2s . c o m final List<DataSegment> segments = converterConfig.getSegments(); if (segments.isEmpty()) { throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource()); } converterConfigIntoConfiguration(converterConfig, segments, jobConf); jobConf.setNumReduceTasks(0);// Map only. Number of map tasks determined by input format jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); setJobName(jobConf, segments); if (converterConfig.getJobPriority() != null) { jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority())); } final Job job = Job.getInstance(jobConf); job.setInputFormatClass(ConfigInputFormat.class); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapSpeculativeExecution(false); job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job); Throwable throwable = null; try { job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); final boolean success = job.waitForCompletion(true); if (!success) { final TaskReport[] reports = job.getTaskReports(TaskType.MAP); if (reports != null) { for (final TaskReport report : reports) { log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); } } return null; } try { loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); } catch (IOException ex) { log.error(ex, "Could not fetch counters"); } final JobID jobID = job.getJobID(); final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true); final List<Path> goodPaths = new ArrayList<>(); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); if (locatedFileStatus.isFile()) { final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY)); } } } if (goodPaths.isEmpty()) { log.warn("No good data found at [%s]", jobDir); return null; } final List<DataSegment> returnList = ImmutableList .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() { @Nullable @Override public DataSegment apply(final Path input) { try { if (!fs.exists(input)) { throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir); } } catch (final IOException e) { throw Throwables.propagate(e); } try (final InputStream stream = fs.open(input)) { return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class); } catch (final IOException e) { throw Throwables.propagate(e); } } })); if (returnList.size() == segments.size()) { return returnList; } else { throw new ISE( "Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir); } } catch (InterruptedException | ClassNotFoundException e) { RuntimeException exception = Throwables.propagate(e); throwable = exception; throw exception; } catch (Throwable t) { throwable = t; throw t; } finally { try { cleanup(job); } catch (IOException e) { if (throwable != null) { throwable.addSuppressed(e); } else { log.error(e, "Could not clean up job [%s]", job.getJobID()); } } } }
From source file:io.hops.erasure_coding.MapReduceBlockRepairManager.java
License:Apache License
void submitJob(Job job) throws IOException, InterruptedException, ClassNotFoundException { job.submit();/*from w w w . j av a 2 s .c o m*/ LOG.info("Job " + job.getJobID() + "(" + job.getJobName() + ") started"); }
From source file:kogiri.common.report.Report.java
License:Open Source License
private String makeText(Job job) { String jobName = job.getJobName(); String jobID = job.getJobID().toString(); String jobStatus;/* w w w .j av a2 s . c o m*/ try { jobStatus = job.getJobState().name(); } catch (IOException ex) { jobStatus = "Unknown"; } catch (InterruptedException ex) { jobStatus = "Unknown"; } String startTimeStr; try { startTimeStr = TimeHelper.getTimeString(job.getStartTime()); } catch (Exception ex) { startTimeStr = "Unknown"; } String finishTimeStr; try { finishTimeStr = TimeHelper.getTimeString(job.getFinishTime()); } catch (Exception ex) { finishTimeStr = "Unknown"; } String timeTakenStr; try { timeTakenStr = TimeHelper.getDiffTimeString(job.getStartTime(), job.getFinishTime()); } catch (Exception ex) { timeTakenStr = "Unknown"; } String countersStr; try { countersStr = job.getCounters().toString(); } catch (Exception ex) { countersStr = "Unknown"; } return "Job : " + jobName + "\n" + "JobID : " + jobID + "\n" + "Status : " + jobStatus + "\n" + "StartTime : " + startTimeStr + "\n" + "FinishTime : " + finishTimeStr + "\n" + "TimeTaken : " + timeTakenStr + "\n\n" + countersStr; }
From source file:ml.shifu.guagua.mapreduce.GuaguaMapReduceClient.java
License:Apache License
/** * Add new job to JobControl instance.//from w w w.j a va 2s .c om */ public synchronized void addJob(String[] args) throws IOException { Job job = createJob(args); this.jc.addJob(new ControlledJob(job, null)); if (this.jobIndexMap.containsKey(job.getJobName())) { throw new IllegalStateException( "Job name should be unique. please check name with: " + job.getJobName()); } this.jobIndexMap.put(job.getJobName(), this.jobIndex); this.jobIndexParams.put(this.jobIndex, args); this.jobRunningTimes.put(this.jobIndex, 1); this.jobIndex += 1; }
From source file:ml.shifu.guagua.mapreduce.GuaguaMapReduceClient.java
License:Apache License
/** * Run all jobs added to JobControl./* ww w . jav a2 s. c om*/ */ public void run() throws IOException { // Initially, all jobs are in wait state. List<ControlledJob> jobsWithoutIds = this.jc.getWaitingJobList(); int totalNeededMRJobs = jobsWithoutIds.size(); LOG.info("{} map-reduce job(s) waiting for submission.", jobsWithoutIds.size()); Thread jcThread = new Thread(this.jc, "Guagua-MapReduce-JobControl"); jcThread.start(); JobClient jobClient = new JobClient(new JobConf(new Configuration())); double lastProg = -1; Set<String> sucessfulJobs = new HashSet<String>(); while (!this.jc.allFinished()) { try { jcThread.join(1000); } catch (InterruptedException ignore) { Thread.currentThread().interrupt(); } List<ControlledJob> jobsAssignedIdInThisRun = new ArrayList<ControlledJob>(totalNeededMRJobs); for (ControlledJob job : jobsWithoutIds) { if (job.getJob().getJobID() != null) { jobsAssignedIdInThisRun.add(job); LOG.info("Job {} is started.", job.getJob().getJobID().toString()); } else { // This job is not assigned an id yet. } } jobsWithoutIds.removeAll(jobsAssignedIdInThisRun); List<ControlledJob> successfulJobs = jc.getSuccessfulJobList(); for (ControlledJob controlledJob : successfulJobs) { String jobId = controlledJob.getJob().getJobID().toString(); if (!sucessfulJobs.contains(jobId)) { LOG.info("Job {} is successful.", jobId); sucessfulJobs.add(jobId); } } List<ControlledJob> failedJobs = jc.getFailedJobList(); for (ControlledJob controlledJob : failedJobs) { String failedJobId = controlledJob.getJob().getJobID().toString(); if (!this.failedCheckingJobs.contains(failedJobId)) { this.failedCheckingJobs.add(failedJobId); String jobName = controlledJob.getJob().getJobName(); Integer jobIndex = this.jobIndexMap.get(jobName); Integer runTimes = this.jobRunningTimes.get(jobIndex); if (runTimes <= 1) { LOG.warn("Job {} is failed, will be submitted again.", jobName); Job newJob = createJob(this.jobIndexParams.get(jobIndex)); this.jc.addJob(new ControlledJob(newJob, null)); this.jobRunningTimes.put(jobIndex, runTimes + 1); this.jobIndexMap.put(newJob.getJobName(), jobIndex); jobsWithoutIds = this.jc.getWaitingJobList(); } else { LOG.warn("Job {} is failed twice, will not be submitted again.", jobName); } } } double prog = calculateProgress(jc, jobClient) / totalNeededMRJobs; notifyProgress(prog, lastProg); lastProg = prog; try { Thread.sleep(2 * 1000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } List<ControlledJob> successfulJobs = jc.getSuccessfulJobList(); LOG.info("Sucessful jobs:"); for (ControlledJob controlledJob : successfulJobs) { LOG.info("Job: {} ", controlledJob); } if (totalNeededMRJobs == successfulJobs.size()) { LOG.info("Guagua jobs: 100% complete"); // add failed jobs to debug since all jobs are finished. List<ControlledJob> failedJobs = jc.getFailedJobList(); if (failedJobs != null && failedJobs.size() > 0) { LOG.info("Failed jobs:"); for (ControlledJob controlledJob : failedJobs) { LOG.debug("Job: {} ", controlledJob); } } } else { List<ControlledJob> failedJobs = jc.getFailedJobList(); if (failedJobs != null && failedJobs.size() > 0) { LOG.info("Failed jobs:"); for (ControlledJob controlledJob : failedJobs) { LOG.warn("Job: {} ", controlledJob); } } } this.jc.stop(); }