Example usage for org.apache.hadoop.mapreduce Job getTrackingURL

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getTrackingURL.

Prototype

public String getTrackingURL()

Source Link

Document

Get the URL where some job progress information will be displayed.

Usage

From source file:cascading.stats.hadoop.HadoopStepStats.java

License:Open Source License

public String getStatusURL() {
    Job runningJob = getJob(getJobStatusClient());

    if (runningJob == null)
        return null;

    return runningJob.getTrackingURL();
}

From source file:com.inmobi.conduit.distcp.tools.DistCp.java

License:Apache License

/**
 * Implements the core-execution. Creates the file-list for copy,
 * and launches the Hadoop-job, to do the copy.
 * @return Job handle//from ww  w  .  j  ava 2  s .com
 * @throws Exception, on failure.
 */
public Job execute() throws Exception {
    assert inputOptions != null;
    assert getConf() != null;

    Job job = null;
    try {
        metaFolder = createMetaFolderPath();
        jobFS = metaFolder.getFileSystem(getConf());

        job = createJob();
        createInputFileListing(job);

        job.submit();
        submitted = true;
    } finally {
        if (!submitted) {
            cleanup();
        }
    }

    String jobID = getJobID(job);
    job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID);

    LOG.info("DistCp job-id: " + jobID);
    LOG.info("DistCp job may be tracked at: " + job.getTrackingURL());
    LOG.info("To cancel, run the following command:\thadoop job -kill " + jobID);

    long jobStartTime = System.nanoTime();
    if (inputOptions.shouldBlock() && !job.waitForCompletion(true)) {
        updateJobTimeInNanos(jobStartTime);
        throw new IOException("DistCp failure: Job " + jobID + " has failed. ");
    }
    updateJobTimeInNanos(jobStartTime);
    return job;
}

From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java

License:Open Source License

public boolean run() {
    try {/* ww w.java 2  s . c o  m*/
        /*
         * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
         * in the final segment.
         */

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            final Job groupByJob = new Job(new Configuration(), String.format(
                    "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));

            injectSystemProperties(groupByJob);
            groupByJob.setInputFormatClass(TextInputFormat.class);
            groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
            groupByJob.setMapOutputKeyClass(BytesWritable.class);
            groupByJob.setMapOutputValueClass(NullWritable.class);
            groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setOutputKeyClass(BytesWritable.class);
            groupByJob.setOutputValueClass(NullWritable.class);
            groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            groupByJob.setJarByClass(DeterminePartitionsJob.class);

            config.addInputPaths(groupByJob);
            config.intoConfiguration(groupByJob);
            FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

            groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(),
                    groupByJob.getTrackingURL());

            if (!groupByJob.waitForCompletion(true)) {
                log.error("Job failed: %s", groupByJob.getJobID());
                return false;
            }
        } else {
            log.info("Skipping group-by job.");
        }

        /*
         * Read grouped data and determine appropriate partitions.
         */
        final Job dimSelectionJob = new Job(new Configuration(), String.format(
                "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));

        dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

        injectSystemProperties(dimSelectionJob);

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            // Read grouped data from the groupByJob.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
            dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
            FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
        } else {
            // Directly read the source data, since we assume it's already grouped.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
            dimSelectionJob.setInputFormatClass(TextInputFormat.class);
            config.addInputPaths(dimSelectionJob);
        }

        SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
        dimSelectionJob.setMapOutputValueClass(Text.class);
        dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
        dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
        dimSelectionJob.setOutputKeyClass(BytesWritable.class);
        dimSelectionJob.setOutputValueClass(Text.class);
        dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
        dimSelectionJob.setJarByClass(DeterminePartitionsJob.class);

        config.intoConfiguration(dimSelectionJob);
        FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

        dimSelectionJob.submit();
        log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(),
                dimSelectionJob.getTrackingURL());

        if (!dimSelectionJob.waitForCompletion(true)) {
            log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
            return false;
        }

        /*
         * Load partitions determined by the previous job.
         */

        log.info("Job completed, loading up partitions for intervals[%s].",
                config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
            DateTime bucket = segmentGranularity.getStart();

            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0));
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
            }
            if (fileSystem.exists(partitionInfoPath)) {
                List<ShardSpec> specs = config.jsonMapper.readValue(
                        Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                        new TypeReference<List<ShardSpec>>() {
                        });

                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
                for (int i = 0; i < specs.size(); ++i) {
                    actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
                }

                shardSpecs.put(bucket, actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);

        return true;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.metamx.druid.indexer.IndexGeneratorJob.java

License:Open Source License

public boolean run() {
    try {/*w w  w . j ava  2s .c  o m*/
        Job job = new Job(new Configuration(),
                String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));

        job.getConfiguration().set("io.sort.record.percent", "0.23");

        for (String propName : System.getProperties().stringPropertyNames()) {
            Configuration conf = job.getConfiguration();
            if (propName.startsWith("hadoop.")) {
                conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
            }
        }

        job.setInputFormatClass(TextInputFormat.class);

        job.setMapperClass(IndexGeneratorMapper.class);
        job.setMapOutputValueClass(Text.class);

        SortableBytes.useSortableBytesAsMapOutputKey(job);

        job.setNumReduceTasks(Iterables.size(config.getAllBuckets()));
        job.setPartitionerClass(IndexGeneratorPartitioner.class);

        job.setReducerClass(IndexGeneratorReducer.class);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
        FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());

        config.addInputPaths(job);
        config.intoConfiguration(job);

        job.setJarByClass(IndexGeneratorJob.class);

        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());

        boolean success = job.waitForCompletion(true);

        Counter invalidRowCount = job.getCounters()
                .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
        jobStats.setInvalidRowCount(invalidRowCount.getValue());

        return success;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.netflix.Aegisthus.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());

    job.setJarByClass(Aegisthus.class);
    CommandLine cl = getOptions(args);/*from  w  w  w  .  j  a  va 2 s.com*/
    if (cl == null) {
        return 1;
    }
    job.setInputFormatClass(AegisthusInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(CassReducer.class);
    List<Path> paths = Lists.newArrayList();
    if (cl.hasOption(OPT_INPUT)) {
        for (String input : cl.getOptionValues(OPT_INPUT)) {
            paths.add(new Path(input));
        }
    }
    if (cl.hasOption(OPT_INPUTDIR)) {
        paths.addAll(getDataFiles(job.getConfiguration(), cl.getOptionValue(OPT_INPUTDIR)));
    }
    TextInputFormat.setInputPaths(job, paths.toArray(new Path[0]));
    TextOutputFormat.setOutputPath(job, new Path(cl.getOptionValue(OPT_OUTPUT)));

    job.submit();
    System.out.println(job.getJobID());
    System.out.println(job.getTrackingURL());
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.ngdata.hbaseindexer.mr.HBaseMapReduceIndexerTool.java

License:Apache License

public int run(HBaseIndexingOptions hbaseIndexingOpts, JobProcessCallback callback) throws Exception {

    if (hbaseIndexingOpts.isDryRun) {
        return new IndexerDryRun(hbaseIndexingOpts, getConf(), System.out).run();
    }/*from  ww w .java 2s.  c  o m*/

    long programStartTime = System.currentTimeMillis();
    Configuration conf = getConf();

    IndexingSpecification indexingSpec = hbaseIndexingOpts.getIndexingSpecification();

    conf.set(HBaseIndexerMapper.INDEX_COMPONENT_FACTORY_KEY, indexingSpec.getIndexerComponentFactory());
    conf.set(HBaseIndexerMapper.INDEX_CONFIGURATION_CONF_KEY,
            new String(indexingSpec.getConfiguration(), Charsets.UTF_8));
    conf.set(HBaseIndexerMapper.INDEX_NAME_CONF_KEY, indexingSpec.getIndexerName());
    conf.set(HBaseIndexerMapper.TABLE_NAME_CONF_KEY, indexingSpec.getTableName());
    HBaseIndexerMapper.configureIndexConnectionParams(conf, indexingSpec.getIndexConnectionParams());

    IndexerComponentFactory factory = IndexerComponentFactoryUtil.getComponentFactory(
            indexingSpec.getIndexerComponentFactory(),
            new ByteArrayInputStream(indexingSpec.getConfiguration()), indexingSpec.getIndexConnectionParams());
    IndexerConf indexerConf = factory.createIndexerConf();

    Map<String, String> params = indexerConf.getGlobalParams();
    String morphlineFile = params.get(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM);
    if (hbaseIndexingOpts.morphlineFile != null) {
        morphlineFile = hbaseIndexingOpts.morphlineFile.getPath();
    }
    if (morphlineFile != null) {
        conf.set(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM, new File(morphlineFile).getName());
        ForkedMapReduceIndexerTool.addDistributedCacheFile(new File(morphlineFile), conf);
    }

    String morphlineId = params.get(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM);
    if (hbaseIndexingOpts.morphlineId != null) {
        morphlineId = hbaseIndexingOpts.morphlineId;
    }
    if (morphlineId != null) {
        conf.set(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM, morphlineId);
    }

    conf.setBoolean(HBaseIndexerMapper.INDEX_DIRECT_WRITE_CONF_KEY, hbaseIndexingOpts.isDirectWrite());

    if (hbaseIndexingOpts.fairSchedulerPool != null) {
        conf.set("mapred.fairscheduler.pool", hbaseIndexingOpts.fairSchedulerPool);
    }

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (hbaseIndexingOpts.log4jConfigFile != null) {
        Utils.setLogConfigFile(hbaseIndexingOpts.log4jConfigFile, getConf());
        ForkedMapReduceIndexerTool.addDistributedCacheFile(hbaseIndexingOpts.log4jConfigFile, conf);
    }

    Job job = Job.getInstance(getConf());
    job.setJobName(getClass().getSimpleName() + "/" + HBaseIndexerMapper.class.getSimpleName());
    job.setJarByClass(HBaseIndexerMapper.class);
    //        job.setUserClassesTakesPrecedence(true);

    TableMapReduceUtil.initTableMapperJob(hbaseIndexingOpts.getScans(), HBaseIndexerMapper.class, Text.class,
            SolrInputDocumentWritable.class, job);

    // explicitely set hbase configuration on the job because the TableMapReduceUtil overwrites it with the hbase defaults
    // (see HBASE-4297 which is not really fixed in hbase 0.94.6 on all code paths)
    HBaseConfiguration.merge(job.getConfiguration(), getConf());

    int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1
    //mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only
    LOG.info("Cluster reports {} mapper slots", mappers);

    LOG.info("Using these parameters: " + "reducers: {}, shards: {}, fanout: {}, maxSegments: {}",
            new Object[] { hbaseIndexingOpts.reducers, hbaseIndexingOpts.shards, hbaseIndexingOpts.fanout,
                    hbaseIndexingOpts.maxSegments });

    if (hbaseIndexingOpts.isDirectWrite()) {
        CloudSolrServer solrServer = new CloudSolrServer(hbaseIndexingOpts.zkHost);
        solrServer.setDefaultCollection(hbaseIndexingOpts.collection);

        if (hbaseIndexingOpts.clearIndex) {
            clearSolr(indexingSpec.getIndexConnectionParams());
        }

        // Run a mapper-only MR job that sends index documents directly to a live Solr instance.
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setNumReduceTasks(0);
        job.submit();
        callback.jobStarted(job.getJobID().toString(), job.getTrackingURL());
        if (!ForkedMapReduceIndexerTool.waitForCompletion(job, hbaseIndexingOpts.isVerbose)) {
            return -1; // job failed
        }
        commitSolr(indexingSpec.getIndexConnectionParams());
        ForkedMapReduceIndexerTool.goodbye(job, programStartTime);
        return 0;
    } else {
        FileSystem fileSystem = FileSystem.get(getConf());

        if (fileSystem.exists(hbaseIndexingOpts.outputDir)) {
            if (hbaseIndexingOpts.overwriteOutputDir) {
                LOG.info("Removing existing output directory {}", hbaseIndexingOpts.outputDir);
                if (!fileSystem.delete(hbaseIndexingOpts.outputDir, true)) {
                    LOG.error("Deleting output directory '{}' failed", hbaseIndexingOpts.outputDir);
                    return -1;
                }
            } else {
                LOG.error("Output directory '{}' already exists. Run with --overwrite-output-dir to "
                        + "overwrite it, or remove it manually", hbaseIndexingOpts.outputDir);
                return -1;
            }
        }

        int exitCode = ForkedMapReduceIndexerTool.runIndexingPipeline(job, callback, getConf(),
                hbaseIndexingOpts.asOptions(), programStartTime, fileSystem, null, -1, // File-based parameters
                -1, // num mappers, only of importance for file-based indexing
                hbaseIndexingOpts.reducers);

        if (hbaseIndexingOpts.isGeneratedOutputDir()) {
            LOG.info("Deleting generated output directory " + hbaseIndexingOpts.outputDir);
            fileSystem.delete(hbaseIndexingOpts.outputDir, true);
        }
        return exitCode;
    }
}

From source file:com.streamsets.pipeline.stage.destination.mapreduce.MapReduceExecutor.java

License:Apache License

@Override
public void write(Batch batch) throws StageException {
    EvalContext eval = new EvalContext(getContext());

    Iterator<Record> it = batch.getRecords();
    while (it.hasNext()) {
        final Record record = it.next();
        eval.setRecord(record);/* w  ww  .  jav  a2  s  .  c o  m*/

        Job job = null;

        try {
            // Job configuration object is a clone of the original one that we're keeping in mapReduceConfig class
            final Configuration jobConfiguration = new Configuration(mapReduceConfig.getConfiguration());

            // Evaluate all dynamic properties and store them in the configuration job
            for (Map.Entry<String, String> entry : jobConfig.jobConfigs.entrySet()) {
                String key = eval.evaluateToString("jobConfigs", entry.getKey(), true);
                String value = eval.evaluateToString("jobConfigs", entry.getValue(), false);

                jobConfiguration.set(key, value);
            }

            // For build-in job creators, evaluate their properties and persist them in the MR config
            switch (jobConfig.jobType) {
            case AVRO_PARQUET:
                jobConfiguration.set(AvroConversionCommonConstants.INPUT_FILE, eval
                        .evaluateToString("inputFile", jobConfig.avroConversionCommonConfig.inputFile, true));
                jobConfiguration.set(AvroConversionCommonConstants.OUTPUT_DIR, eval.evaluateToString(
                        "outputDirectory", jobConfig.avroConversionCommonConfig.outputDirectory, true));
                jobConfiguration.setBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE,
                        jobConfig.avroConversionCommonConfig.keepInputFile);
                jobConfiguration.set(AvroParquetConstants.COMPRESSION_CODEC_NAME, eval.evaluateToString(
                        "compressionCodec", jobConfig.avroParquetConfig.compressionCodec, false));
                jobConfiguration.setInt(AvroParquetConstants.ROW_GROUP_SIZE,
                        jobConfig.avroParquetConfig.rowGroupSize);
                jobConfiguration.setInt(AvroParquetConstants.PAGE_SIZE, jobConfig.avroParquetConfig.pageSize);
                jobConfiguration.setInt(AvroParquetConstants.DICTIONARY_PAGE_SIZE,
                        jobConfig.avroParquetConfig.dictionaryPageSize);
                jobConfiguration.setInt(AvroParquetConstants.MAX_PADDING_SIZE,
                        jobConfig.avroParquetConfig.maxPaddingSize);
                jobConfiguration.setBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE,
                        jobConfig.avroConversionCommonConfig.overwriteTmpFile);
                break;
            case AVRO_ORC:
                jobConfiguration.set(AvroConversionCommonConstants.INPUT_FILE, eval
                        .evaluateToString("inputFile", jobConfig.avroConversionCommonConfig.inputFile, true));
                jobConfiguration.set(AvroConversionCommonConstants.OUTPUT_DIR, eval.evaluateToString(
                        "outputDirectory", jobConfig.avroConversionCommonConfig.outputDirectory, true));
                jobConfiguration.setBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE,
                        jobConfig.avroConversionCommonConfig.keepInputFile);
                jobConfiguration.setBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE,
                        jobConfig.avroConversionCommonConfig.overwriteTmpFile);
                jobConfiguration.setInt(AvroOrcConstants.ORC_BATCH_SIZE, jobConfig.avroOrcConfig.orcBatchSize);
                break;
            case CUSTOM:
                // Nothing because custom is generic one that have no special config properties
                break;
            default:
                throw new UnsupportedOperationException("Unsupported JobType: " + jobConfig.jobType);
            }

            job = createAndSubmitJob(jobConfiguration);
        } catch (IOException | InterruptedException | ELEvalException e) {
            LOG.error("Can't submit mapreduce job", e);
            errorRecordHandler.onError(
                    new OnRecordErrorException(record, MapReduceErrors.MAPREDUCE_0005, e.getMessage(), e));
        }

        if (job != null) {
            MapReduceExecutorEvents.JOB_CREATED.create(getContext()).with("tracking-url", job.getTrackingURL())
                    .with("job-id", job.getJobID().toString()).createAndSend();
        }
    }
}

From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java

License:Open Source License

/**
 * Visualizes a dataset.//from  ww w .  ja  v a2  s.  c  o m
 * @param request
 * @param response
 */
private void handleVisualize(HttpServletRequest request, HttpServletResponse response) {
    try {
        String pathStr = request.getParameter("path");
        final Path path = new Path(pathStr);
        FileSystem fs = path.getFileSystem(commonParams);
        // Check if the input is already visualized
        final Path imagePath = new Path(path, "_data.png");
        if (fs.exists(imagePath)) {
            // Image is already visualized
            response.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY);
            response.setHeader("Location", "/hdfs" + imagePath);
        } else {
            // This dataset has never been visualized before
            String shapeName = request.getParameter("shape");
            final OperationsParams vizParams = new OperationsParams(commonParams);
            vizParams.set("shape", shapeName);
            vizParams.setBoolean("background", true);
            vizParams.setInt("width", 2000);
            vizParams.setInt("height", 2000);

            // Retrieve the owner of the data directory
            String owner = fs.getFileStatus(path).getOwner();
            UserGroupInformation ugi = UserGroupInformation.createRemoteUser(owner);
            Job vizJob = ugi.doAs(new PrivilegedExceptionAction<Job>() {
                public Job run() throws Exception {
                    return GeometricPlot.plot(new Path[] { path }, imagePath, vizParams);
                }
            });

            // Write the response
            response.setStatus(HttpServletResponse.SC_OK);
            response.setContentType("application/json;charset=utf-8");
            PrintWriter out = response.getWriter();
            out.printf("{\"JobID\":\"%s\", \"TrackURL\": \"%s\"}", vizJob.getJobID().toString(),
                    vizJob.getTrackingURL());
            out.close();
        }
    } catch (Exception e) {
        System.out.println("error happened");
        e.printStackTrace();
        try {
            e.printStackTrace(response.getWriter());
        } catch (IOException ioe) {
            ioe.printStackTrace();
            e.printStackTrace();
        }
        response.setContentType("text/plain;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
    }
}

From source file:gobblin.compaction.mapreduce.MRCompactorJobRunner.java

License:Apache License

private void submitAndWait(Job job) throws ClassNotFoundException, IOException, InterruptedException {
    job.submit();/*from   w  ww.j  av  a2s  . co m*/
    MRCompactor.addRunningHadoopJob(this.dataset, job);
    LOG.info(String.format("MR job submitted for dataset %s, input %s, url: %s", this.dataset, getInputPaths(),
            job.getTrackingURL()));
    while (!job.isComplete()) {
        if (this.policy == Policy.ABORT_ASAP) {
            LOG.info(String.format("MR job for dataset %s, input %s killed due to input data incompleteness."
                    + " Will try again later", this.dataset, getInputPaths()));
            job.killJob();
            return;
        }
        Thread.sleep(MR_JOB_CHECK_COMPLETE_INTERVAL_MS);
    }
    if (!job.isSuccessful()) {
        throw new RuntimeException(String.format("MR job failed for topic %s, input %s, url: %s", this.dataset,
                getInputPaths(), job.getTrackingURL()));
    }
}

From source file:gobblin.runtime.mapreduce.MRTask.java

License:Apache License

@Override
public void run() {

    try {/*w ww  .  ja v a  2s  .  c  o  m*/
        Job job = createJob();

        job.submit();
        this.eventSubmitter.submit(Events.MR_JOB_STARTED_EVENT, Events.JOB_URL, job.getTrackingURL());
        job.waitForCompletion(false);

        if (job.isSuccessful()) {
            this.eventSubmitter.submit(Events.MR_JOB_SUCCESSFUL, Events.JOB_URL, job.getTrackingURL());
            this.workingState = WorkUnitState.WorkingState.SUCCESSFUL;
        } else {
            this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.JOB_URL, job.getTrackingURL());
            this.workingState = WorkUnitState.WorkingState.FAILED;
        }
    } catch (Throwable t) {
        log.error("Failed to run MR job.", t);
        this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.FAILURE_CONTEXT, t.getMessage());
        this.workingState = WorkUnitState.WorkingState.FAILED;
    }
}