Example usage for org.apache.hadoop.mapreduce Job getJobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getJobID.

Prototype

public JobID getJobID()

Source Link

Document

Get the unique ID for the job.

Usage

From source file:kogiri.common.report.Report.java

License:Open Source License

private String makeText(Job job) {
    String jobName = job.getJobName();
    String jobID = job.getJobID().toString();
    String jobStatus;/* w w  w  . j  a v  a  2 s  .co m*/
    try {
        jobStatus = job.getJobState().name();
    } catch (IOException ex) {
        jobStatus = "Unknown";
    } catch (InterruptedException ex) {
        jobStatus = "Unknown";
    }

    String startTimeStr;
    try {
        startTimeStr = TimeHelper.getTimeString(job.getStartTime());
    } catch (Exception ex) {
        startTimeStr = "Unknown";
    }

    String finishTimeStr;
    try {
        finishTimeStr = TimeHelper.getTimeString(job.getFinishTime());
    } catch (Exception ex) {
        finishTimeStr = "Unknown";
    }

    String timeTakenStr;
    try {
        timeTakenStr = TimeHelper.getDiffTimeString(job.getStartTime(), job.getFinishTime());
    } catch (Exception ex) {
        timeTakenStr = "Unknown";
    }

    String countersStr;
    try {
        countersStr = job.getCounters().toString();
    } catch (Exception ex) {
        countersStr = "Unknown";
    }

    return "Job : " + jobName + "\n" + "JobID : " + jobID + "\n" + "Status : " + jobStatus + "\n"
            + "StartTime : " + startTimeStr + "\n" + "FinishTime : " + finishTimeStr + "\n" + "TimeTaken : "
            + timeTakenStr + "\n\n" + countersStr;
}

From source file:org.apache.druid.indexer.DeterminePartitionsJob.java

License:Apache License

@Override
public boolean run() {
    try {//from  w  w w  . j  a v  a 2s.co  m
        /*
         * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
         * in the final segment.
         */

        if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) {
            throw new ISE(
                    "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]",
                    config.getPartitionsSpec());
        }

        final SingleDimensionPartitionsSpec partitionsSpec = (SingleDimensionPartitionsSpec) config
                .getPartitionsSpec();

        if (!partitionsSpec.isAssumeGrouped()) {
            groupByJob = Job.getInstance(new Configuration(), StringUtils.format(
                    "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));

            JobHelper.injectSystemProperties(groupByJob);
            config.addJobProperties(groupByJob);

            groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
            groupByJob.setMapOutputKeyClass(BytesWritable.class);
            groupByJob.setMapOutputValueClass(NullWritable.class);
            groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setOutputKeyClass(BytesWritable.class);
            groupByJob.setOutputValueClass(NullWritable.class);
            groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()),
                    JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob);

            config.addInputPaths(groupByJob);
            config.intoConfiguration(groupByJob);
            FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

            groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(),
                    groupByJob.getTrackingURL());

            // Store the jobId in the file
            if (groupByJob.getJobID() != null) {
                JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString());
            }

            try {
                if (!groupByJob.waitForCompletion(true)) {
                    log.error("Job failed: %s", groupByJob.getJobID());
                    failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER);
                    return false;
                }
            } catch (IOException ioe) {
                if (!Utils.checkAppSuccessForJobIOException(ioe, groupByJob,
                        config.isUseYarnRMJobStatusFallback())) {
                    throw ioe;
                }
            }
        } else {
            log.info("Skipping group-by job.");
        }

        /*
         * Read grouped data and determine appropriate partitions.
         */
        final Job dimSelectionJob = Job.getInstance(new Configuration(), StringUtils.format(
                "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));

        dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

        JobHelper.injectSystemProperties(dimSelectionJob);
        config.addJobProperties(dimSelectionJob);

        if (!partitionsSpec.isAssumeGrouped()) {
            // Read grouped data from the groupByJob.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
            dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
            FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
        } else {
            // Directly read the source data, since we assume it's already grouped.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
            config.addInputPaths(dimSelectionJob);
        }

        SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob,
                DeterminePartitionsDimSelectionPartitioner.class);
        dimSelectionJob.setMapOutputValueClass(Text.class);
        dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
        dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
        dimSelectionJob.setOutputKeyClass(BytesWritable.class);
        dimSelectionJob.setOutputValueClass(Text.class);
        dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
        dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()),
                JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob);

        config.intoConfiguration(dimSelectionJob);
        FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

        dimSelectionJob.submit();
        log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(),
                dimSelectionJob.getTrackingURL());

        // Store the jobId in the file
        if (dimSelectionJob.getJobID() != null) {
            JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), dimSelectionJob.getJobID().toString());
        }

        try {
            if (!dimSelectionJob.waitForCompletion(true)) {
                log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
                failureCause = Utils.getFailureMessage(dimSelectionJob, config.JSON_MAPPER);
                return false;
            }
        } catch (IOException ioe) {
            if (!Utils.checkAppSuccessForJobIOException(ioe, dimSelectionJob,
                    config.isUseYarnRMJobStatusFallback())) {
                throw ioe;
            }
        }

        /*
         * Load partitions determined by the previous job.
         */

        log.info("Job completed, loading up partitions for intervals[%s].",
                config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>();
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
            }
            if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) {
                List<ShardSpec> specs = config.JSON_MAPPER.readValue(
                        Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                        new TypeReference<List<ShardSpec>>() {
                        });

                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
                for (int i = 0; i < specs.size(); ++i) {
                    actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i,
                            actualSpecs.get(i));
                }

                shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);

        return true;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public static void cleanup(Job job) throws IOException {
    final Path jobDir = getJobPath(job.getJobID(), job.getWorkingDirectory());
    final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
    RuntimeException e = null;/*  w w w.  j  a  v a 2s  .  co  m*/
    try {
        JobHelper.deleteWithRetry(fs, jobDir, true);
    } catch (RuntimeException ex) {
        e = ex;
    }
    try {
        JobHelper.deleteWithRetry(fs, getJobClassPathDir(job.getJobName(), job.getWorkingDirectory()), true);
    } catch (RuntimeException ex) {
        if (e == null) {
            e = ex;
        } else {
            e.addSuppressed(ex);
        }
    }
    if (e != null) {
        throw e;
    }
}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }//  ww  w .  j a va2s  .c o  m
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:org.apache.druid.indexer.Utils.java

License:Apache License

private static void checkAppSuccessFromYarnRMOnce(HttpClient httpClient, Job job, AtomicBoolean succeeded)
        throws IOException, InterruptedException, ExecutionException, TimeoutException {
    String appId = StringUtils.replace(job.getJobID().toString(), "job", "application");
    String yarnRM = job.getConfiguration().get("yarn.resourcemanager.webapp.address");
    String yarnEndpoint = StringUtils.format("http://%s/ws/v1/cluster/apps/%s", yarnRM, appId);
    log.info("Attempting to retrieve app status from YARN ResourceManager at [%s].", yarnEndpoint);

    ContentResponse res = httpClient.GET(yarnEndpoint);
    log.info("App status response from YARN RM: " + res.getContentAsString());
    Map<String, Object> respMap = HadoopDruidIndexerConfig.JSON_MAPPER.readValue(res.getContentAsString(),
            new TypeReference<Map<String, Object>>() {
            });//from w  w w . ja va 2s  .c  om

    Map<String, Object> appMap = (Map<String, Object>) respMap.get("app");
    String state = (String) appMap.get("state");
    String finalStatus = (String) appMap.get("finalStatus");
    if ("FINISHED".equals(state) && "SUCCEEDED".equals(finalStatus)) {
        succeeded.set(true);
    }
}

From source file:org.apache.falcon.hive.HiveDRTool.java

License:Apache License

public Job execute() throws Exception {
    assert inputOptions != null;
    assert getConf() != null;
    executionStage = inputOptions.getExecutionStage();
    LOG.info("Executing Workflow stage : {}", executionStage);
    if (executionStage.equalsIgnoreCase(HiveDRUtils.ExecutionStage.LASTEVENTS.name())) {
        String lastEventsIdFile = getLastEvents(jobConf);
        LOG.info("Last successfully replicated Event file : {}", lastEventsIdFile);
        return null;
    } else if (executionStage.equalsIgnoreCase(HiveDRUtils.ExecutionStage.EXPORT.name())) {
        createStagingDirectory();/* w ww . jav a2s.  c o  m*/
        eventsMetaFile = sourceEvents();
        LOG.info("Sourced Events meta file : {}", eventsMetaFile);
        if (StringUtils.isEmpty(eventsMetaFile)) {
            LOG.info("No events to process");
            return null;
        } else {
            /*
             * eventsMetaFile contains the events to be processed by HiveDr. This file should be available
             * for the import action as well. Persist the file at a location common to both export and import.
             */
            persistEventsMetafileLocation(eventsMetaFile);
        }
    } else if (executionStage.equalsIgnoreCase(HiveDRUtils.ExecutionStage.IMPORT.name())) {
        // read the location of eventsMetaFile from hdfs
        eventsMetaFile = getEventsMetaFileLocation();
        if (StringUtils.isEmpty(eventsMetaFile)) {
            LOG.info("No events to process");
            return null;
        }
    } else {
        throw new HiveReplicationException("Invalid Execution stage : " + inputOptions.getExecutionStage());
    }

    Job job = createJob();
    job.submit();

    String jobID = job.getJobID().toString();
    job.getConfiguration().set("HIVEDR_JOB_ID", jobID);

    LOG.info("HiveDR job-id: {}", jobID);
    if (inputOptions.shouldBlock() && !job.waitForCompletion(true)) {
        throw new IOException(
                "HiveDR failure: Job " + jobID + " has failed: " + job.getStatus().getFailureInfo());
    }

    return job;
}

From source file:org.apache.falcon.hive.util.EventUtils.java

License:Apache License

public void invokeCopy() throws Exception {
    DistCpOptions options = getDistCpOptions();
    DistCp distCp = new DistCp(conf, options);
    LOG.info("Started DistCp with source Path: {} \ttarget path: {}", sourceStagingUri, targetStagingUri);

    Job distcpJob = distCp.execute();
    LOG.info("Distp Hadoop job: {}", distcpJob.getJobID().toString());
    LOG.info("Completed DistCp");
    if (distcpJob.getStatus().getState() == JobStatus.State.SUCCEEDED) {
        countersMap = HiveDRUtils.fetchReplicationCounters(conf, distcpJob);
    }/*  w  ww .  jav  a  2 s  . c  o  m*/
}

From source file:org.apache.falcon.snapshots.replication.HdfsSnapshotReplicator.java

License:Apache License

protected void invokeCopy(String sourceStorageUrl, String targetStorageUrl, DistributedFileSystem sourceFs,
        DistributedFileSystem targetFs, String sourceDir, String targetDir, String currentSnapshotName)
        throws FalconException {
    try {// w  ww . ja  v  a  2s.  c o  m
        Configuration jobConf = this.getConf();
        DistCpOptions options = getDistCpOptions(sourceStorageUrl, targetStorageUrl, sourceFs, targetFs,
                sourceDir, targetDir, currentSnapshotName);
        DistCp distCp = new DistCp(jobConf, options);
        LOG.info("Started Snapshot based DistCp from {} to {} ", getStagingUri(sourceStorageUrl, sourceDir),
                getStagingUri(targetStorageUrl, targetDir));
        Job distcpJob = distCp.execute();
        LOG.info("Distp Hadoop job: {}", distcpJob.getJobID().toString());
        LOG.info("Completed Snapshot based DistCp");

    } catch (FalconException fe) {
        throw fe;
    } catch (Exception e) {
        throw new FalconException("Unable to replicate HDFS directory using snapshots.", e);
    }
}

From source file:org.apache.giraph.job.HadoopUtils.java

License:Apache License

/**
 * Get Job ID from job.//from w w w  .  j  av  a 2s  .c om
 * May return null for hadoop 0.20.203
 * @param job submitted job
 * @return JobId for submitted job.
 */
public static JobID getJobID(Job job) {
    /*if[HADOOP_JOB_ID_AVAILABLE]
    return job.getID();
    else[HADOOP_JOB_ID_AVAILABLE]*/
    return job.getJobID();
    /*end[HADOOP_JOB_ID_AVAILABLE]*/
}

From source file:org.apache.hadoop.examples.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi/*  w  w  w  .  j a  va 2 s. c o m*/
 */
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = Job.getInstance(conf);
    //setup job conf
    job.setJobName(QuasiMonteCarlo.class.getSimpleName());
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(tmpDir) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = Time.monotonicNow();
        job.waitForCompletion(true);
        if (!job.isSuccessful()) {
            System.out.println("Job " + job.getJobID() + " failed!");
            System.exit(1);
        }
        final double duration = (Time.monotonicNow() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal,
                RoundingMode.HALF_UP);
    } finally {
        fs.delete(tmpDir, true);
    }
}