Example usage for org.apache.hadoop.mapreduce JobID getJtIdentifier

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobID getJtIdentifier.

Prototype

public String getJtIdentifier()

Source Link

Usage

From source file:com.marklogic.contentpump.LocalJobRunner.java

License:Apache License

/**
 * Run the job.  Get the input splits, create map tasks and submit it to
 * the thread pool if there is one; otherwise, runs the the task one by
 * one./*from   w  ww.j  a  va2  s. c om*/
 * 
 * @param <INKEY>
 * @param <INVALUE>
 * @param <OUTKEY>
 * @param <OUTVALUE>
 * @throws Exception
 */
@SuppressWarnings("unchecked")
public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run()
        throws Exception {
    Configuration conf = job.getConfiguration();
    InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils
            .newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = inputFormat.getSplits(job);
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // goes first
    Arrays.sort(array, new SplitLengthComparator());
    OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(job.getOutputFormatClass(), conf);
    Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass();
    Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(mapperClass, conf);
    try {
        outputFormat.checkOutputSpecs(job);
    } catch (Exception ex) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error checking output specification: ", ex);
        } else {
            LOG.error("Error checking output specification: ");
            LOG.error(ex.getMessage());
        }
        return;
    }
    conf = job.getConfiguration();
    progress = new AtomicInteger[splits.size()];
    for (int i = 0; i < splits.size(); i++) {
        progress[i] = new AtomicInteger();
    }
    Monitor monitor = new Monitor();
    monitor.start();
    reporter = new ContentPumpReporter();
    List<Future<Object>> taskList = new ArrayList<Future<Object>>();
    for (int i = 0; i < array.length; i++) {
        InputSplit split = array[i];
        if (pool != null) {
            LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>(
                    inputFormat, outputFormat, conf, i, split, reporter, progress[i]);
            availableThreads = assignThreads(i, array.length);
            Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass();
            if (availableThreads > 1 && availableThreads != threadsPerSplit) {
                // possible runtime adjustment
                if (runtimeMapperClass != (Class) MultithreadedMapper.class) {
                    runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd
                            .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads);
                }
                if (runtimeMapperClass != mapperClass) {
                    task.setMapperClass(runtimeMapperClass);
                }
                if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                    task.setThreadCount(availableThreads);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Thread Count for Split#" + i + " : " + availableThreads);
                    }
                }
            }

            if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                synchronized (pool) {
                    taskList.add(pool.submit(task));
                    pool.wait();
                }
            } else {
                pool.submit(task);
            }
        } else { // single-threaded
            JobID jid = new JobID();
            TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i);
            TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0);
            TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId);
            RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context);
            RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context);
            OutputCommitter committer = outputFormat.getOutputCommitter(context);
            TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]);

            Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId,
                    trackingReader, writer, committer, reporter, split);

            trackingReader.initialize(split, mapperContext);

            // no thread pool (only 1 thread specified)
            Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass();
            mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class);
            mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass,
                    mapperContext.getConfiguration());
            mapper.run(mapperContext);
            trackingReader.close();
            writer.close(mapperContext);
            committer.commitTask(context);
        }
    }
    // wait till all tasks are done
    if (pool != null) {
        for (Future<Object> f : taskList) {
            f.get();
        }
        pool.shutdown();
        while (!pool.awaitTermination(1, TimeUnit.DAYS))
            ;
        jobComplete.set(true);
    }
    monitor.interrupt();
    monitor.join(1000);

    // report counters
    Iterator<CounterGroup> groupIt = reporter.counters.iterator();
    while (groupIt.hasNext()) {
        CounterGroup group = groupIt.next();
        LOG.info(group.getDisplayName() + ": ");
        Iterator<Counter> counterIt = group.iterator();
        while (counterIt.hasNext()) {
            Counter counter = counterIt.next();
            LOG.info(counter.getDisplayName() + ": " + counter.getValue());
        }
    }
    LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec");
}

From source file:datafu.hourglass.jobs.StagedOutputJob.java

License:Apache License

/**
 * Writes Hadoop counters and other task statistics to a file in the file system.
 * //from  w w w.  j a  v a2s.co  m
 * @param fs
 * @throws IOException
 */
private void writeCounters(final FileSystem fs) throws IOException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);

    SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss");

    String suffix = timestampFormat.format(new Date());

    if (_countersParentPath != null) {
        if (!fs.exists(_countersParentPath)) {
            _log.info("Creating counter parent path " + _countersParentPath);
            fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x"));
        }
        // make the name as unique as possible in this case because this may be a directory
        // where other counter files will be dropped
        _countersPath = new Path(_countersParentPath, ".counters." + suffix);
    } else {
        _countersPath = new Path(actualOutputPath, ".counters." + suffix);
    }

    _log.info(String.format("Writing counters to %s", _countersPath));
    FSDataOutputStream counterStream = fs.create(_countersPath);
    BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024);
    OutputStreamWriter writer = new OutputStreamWriter(buffer);
    for (String groupName : getCounters().getGroupNames()) {
        for (Counter counter : getCounters().getGroup(groupName)) {
            writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue()));
        }
    }

    JobID jobID = this.getJobID();

    org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(),
            jobID.getId());

    long minStart = Long.MAX_VALUE;
    long maxFinish = 0;
    long setupStart = Long.MAX_VALUE;
    long cleanupFinish = 0;
    DescriptiveStatistics mapStats = new DescriptiveStatistics();
    DescriptiveStatistics reduceStats = new DescriptiveStatistics();
    boolean success = true;

    JobClient jobClient = new JobClient(this.conf);

    Map<String, String> taskIdToType = new HashMap<String, String>();

    TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId);
    if (setupReports.length > 0) {
        _log.info("Processing setup reports");
        for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) {
            taskIdToType.put(report.getTaskID().toString(), "SETUP");
            if (report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start time");
                continue;
            }
            setupStart = Math.min(setupStart, report.getStartTime());
        }
    } else {
        _log.error("No setup reports");
    }

    TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId);
    if (mapReports.length > 0) {
        _log.info("Processing map reports");
        for (TaskReport report : mapReports) {
            taskIdToType.put(report.getTaskID().toString(), "MAP");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            minStart = Math.min(minStart, report.getStartTime());
            mapStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No map reports");
    }

    TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId);
    if (reduceReports.length > 0) {
        _log.info("Processing reduce reports");
        for (TaskReport report : reduceReports) {
            taskIdToType.put(report.getTaskID().toString(), "REDUCE");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            maxFinish = Math.max(maxFinish, report.getFinishTime());
            reduceStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No reduce reports");
    }

    TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId);
    if (cleanupReports.length > 0) {
        _log.info("Processing cleanup reports");
        for (TaskReport report : cleanupReports) {
            taskIdToType.put(report.getTaskID().toString(), "CLEANUP");
            if (report.getFinishTime() == 0) {
                _log.warn("Skipping report with finish time of zero");
                continue;
            }
            cleanupFinish = Math.max(cleanupFinish, report.getFinishTime());
        }
    } else {
        _log.error("No cleanup reports");
    }

    if (minStart == Long.MAX_VALUE) {
        _log.error("Could not determine map-reduce start time");
        success = false;
    }
    if (maxFinish == 0) {
        _log.error("Could not determine map-reduce finish time");
        success = false;
    }

    if (setupStart == Long.MAX_VALUE) {
        _log.error("Could not determine setup start time");
        success = false;
    }
    if (cleanupFinish == 0) {
        _log.error("Could not determine cleanup finish time");
        success = false;
    }

    // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup.
    // Unfortunately the job client doesn't have an easier way to get these statistics.
    Map<String, Integer> attemptStats = new HashMap<String, Integer>();
    _log.info("Processing task attempts");
    for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) {
        String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString());
        String status = event.getTaskStatus().toString();

        String key = String.format("%s_%s_ATTEMPTS", status, type);
        if (!attemptStats.containsKey(key)) {
            attemptStats.put(key, 0);
        }
        attemptStats.put(key, attemptStats.get(key) + 1);
    }

    if (success) {
        writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart));
        writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish));
        writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart));

        writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart));
        writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish));
        writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart));

        writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN()));
        writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax()));
        writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin()));
        writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean()));
        writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation()));
        writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum()));

        writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN()));
        writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax()));
        writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin()));
        writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean()));
        writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation()));
        writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum()));

        writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d",
                (long) mapStats.getSum() + (long) reduceStats.getSum()));

        for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) {
            writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue()));
        }
    }

    writer.close();
    buffer.close();
    counterStream.close();
}

From source file:org.apache.beam.sdk.io.hadoop.format.HDFSSynchronization.java

License:Apache License

private String getJobJtIdentifier(Configuration conf) {
    JobID job = Preconditions.checkNotNull(HadoopFormats.getJobId(conf),
            "Configuration must contain jobID under key %s.", HadoopFormatIO.JOB_ID);
    return job.getJtIdentifier();
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocol.java

License:Apache License

/** {@inheritDoc} */
@Override/* w  w  w .  ja v a2  s  .c  o m*/
public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts)
        throws IOException, InterruptedException {
    try {
        conf.setLong(JOB_SUBMISSION_START_TS_PROPERTY, U.currentTimeMillis());

        GridHadoopJobStatus status = cli.compute().execute(GridHadoopProtocolSubmitJobTask.class.getName(),
                new GridHadoopProtocolTaskArguments(jobId.getJtIdentifier(), jobId.getId(),
                        createJobInfo(conf)));

        assert status != null;

        return processStatus(status);
    } catch (GridClientException | IgniteCheckedException e) {
        throw new IOException("Failed to submit job.", e);
    }
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocol.java

License:Apache License

/** {@inheritDoc} */
@Override//w w w .  j  a v  a 2s .  co  m
public void killJob(JobID jobId) throws IOException, InterruptedException {
    try {
        cli.compute().execute(GridHadoopProtocolKillJobTask.class.getName(),
                new GridHadoopProtocolTaskArguments(jobId.getJtIdentifier(), jobId.getId()));
    } catch (GridClientException e) {
        throw new IOException("Failed to kill job: " + jobId, e);
    }
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocol.java

License:Apache License

/** {@inheritDoc} */
@Override/*from   w w  w.j av a 2  s.  c o  m*/
public JobStatus getJobStatus(JobID jobId) throws IOException, InterruptedException {
    try {
        Long delay = conf.getLong(GridHadoopJobProperty.JOB_STATUS_POLL_DELAY.propertyName(), -1);

        GridHadoopProtocolTaskArguments args = delay >= 0
                ? new GridHadoopProtocolTaskArguments(jobId.getJtIdentifier(), jobId.getId(), delay)
                : new GridHadoopProtocolTaskArguments(jobId.getJtIdentifier(), jobId.getId());

        GridHadoopJobStatus status = cli.compute().execute(GridHadoopProtocolJobStatusTask.class.getName(),
                args);

        if (status == null)
            throw new IOException("Job tracker doesn't have any information about the job: " + jobId);

        return processStatus(status);
    } catch (GridClientException e) {
        throw new IOException("Failed to get job status: " + jobId, e);
    }
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocol.java

License:Apache License

/** {@inheritDoc} */
@Override/* w  ww  .ja  v  a2  s.  co m*/
public Counters getJobCounters(JobID jobId) throws IOException, InterruptedException {
    try {
        final GridHadoopCounters counters = cli.compute().execute(
                GridHadoopProtocolJobCountersTask.class.getName(),
                new GridHadoopProtocolTaskArguments(jobId.getJtIdentifier(), jobId.getId()));

        if (counters == null)
            throw new IOException("Job tracker doesn't have any information about the job: " + jobId);

        return new GridHadoopClientCounters(counters);
    } catch (GridClientException e) {
        throw new IOException("Failed to get job counters: " + jobId, e);
    }
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Test next job ID generation./*from   ww w .  ja v  a 2s.com*/
 *
 * @throws Exception If failed.
 */
@SuppressWarnings("ConstantConditions")
private void tstNextJobId() throws Exception {
    GridHadoopClientProtocolProvider provider = provider();

    ClientProtocol proto = provider.create(config(GridHadoopAbstractSelfTest.REST_PORT));

    JobID jobId = proto.getNewJobID();

    assert jobId != null;
    assert jobId.getJtIdentifier() != null;

    JobID nextJobId = proto.getNewJobID();

    assert nextJobId != null;
    assert nextJobId.getJtIdentifier() != null;

    assert !F.eq(jobId, nextJobId);
}

From source file:org.apache.ignite.client.hadoop.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Test next job ID generation.//from   www .  jav  a2 s .c o m
 *
 * @throws Exception If failed.
 */
@SuppressWarnings("ConstantConditions")
private void tstNextJobId() throws Exception {
    IgniteHadoopClientProtocolProvider provider = provider();

    ClientProtocol proto = provider.create(config(HadoopAbstractSelfTest.REST_PORT));

    JobID jobId = proto.getNewJobID();

    assert jobId != null;
    assert jobId.getJtIdentifier() != null;

    JobID nextJobId = proto.getNewJobID();

    assert nextJobId != null;
    assert nextJobId.getJtIdentifier() != null;

    assert !F.eq(jobId, nextJobId);
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.proto.HadoopClientProtocol.java

License:Apache License

/** {@inheritDoc} */
@Override/*from  w w w  .j av  a 2s .c  om*/
public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts)
        throws IOException, InterruptedException {
    try {
        conf.setLong(HadoopCommonUtils.JOB_SUBMISSION_START_TS_PROPERTY, U.currentTimeMillis());

        HadoopJobStatus status = execute(HadoopProtocolSubmitJobTask.class, jobId.getJtIdentifier(),
                jobId.getId(), createJobInfo(conf));

        if (status == null)
            throw new IOException("Failed to submit job (null status obtained): " + jobId);

        return processStatus(status);
    } catch (GridClientException | IgniteCheckedException e) {
        throw new IOException("Failed to submit job.", e);
    }
}