Example usage for org.apache.hadoop.mapreduce JobID getJtIdentifier

List of usage examples for org.apache.hadoop.mapreduce JobID getJtIdentifier

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobID getJtIdentifier.

Prototype

public String getJtIdentifier() 

Source Link

Usage

From source file:com.marklogic.contentpump.LocalJobRunner.java

License:Apache License

/**
 * Run the job.  Get the input splits, create map tasks and submit it to
 * the thread pool if there is one; otherwise, runs the the task one by
 * one./*from   w  ww.j  a  va2  s. c om*/
 * 
 * @param <INKEY>
 * @param <INVALUE>
 * @param <OUTKEY>
 * @param <OUTVALUE>
 * @throws Exception
 */
@SuppressWarnings("unchecked")
public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run()
        throws Exception {
    Configuration conf = job.getConfiguration();
    InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils
            .newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = inputFormat.getSplits(job);
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // goes first
    Arrays.sort(array, new SplitLengthComparator());
    OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(job.getOutputFormatClass(), conf);
    Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass();
    Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(mapperClass, conf);
    try {
        outputFormat.checkOutputSpecs(job);
    } catch (Exception ex) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error checking output specification: ", ex);
        } else {
            LOG.error("Error checking output specification: ");
            LOG.error(ex.getMessage());
        }
        return;
    }
    conf = job.getConfiguration();
    progress = new AtomicInteger[splits.size()];
    for (int i = 0; i < splits.size(); i++) {
        progress[i] = new AtomicInteger();
    }
    Monitor monitor = new Monitor();
    monitor.start();
    reporter = new ContentPumpReporter();
    List<Future<Object>> taskList = new ArrayList<Future<Object>>();
    for (int i = 0; i < array.length; i++) {
        InputSplit split = array[i];
        if (pool != null) {
            LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>(
                    inputFormat, outputFormat, conf, i, split, reporter, progress[i]);
            availableThreads = assignThreads(i, array.length);
            Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass();
            if (availableThreads > 1 && availableThreads != threadsPerSplit) {
                // possible runtime adjustment
                if (runtimeMapperClass != (Class) MultithreadedMapper.class) {
                    runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd
                            .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads);
                }
                if (runtimeMapperClass != mapperClass) {
                    task.setMapperClass(runtimeMapperClass);
                }
                if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                    task.setThreadCount(availableThreads);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Thread Count for Split#" + i + " : " + availableThreads);
                    }
                }
            }

            if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                synchronized (pool) {
                    taskList.add(pool.submit(task));
                    pool.wait();
                }
            } else {
                pool.submit(task);
            }
        } else { // single-threaded
            JobID jid = new JobID();
            TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i);
            TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0);
            TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId);
            RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context);
            RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context);
            OutputCommitter committer = outputFormat.getOutputCommitter(context);
            TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]);

            Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId,
                    trackingReader, writer, committer, reporter, split);

            trackingReader.initialize(split, mapperContext);

            // no thread pool (only 1 thread specified)
            Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass();
            mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class);
            mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass,
                    mapperContext.getConfiguration());
            mapper.run(mapperContext);
            trackingReader.close();
            writer.close(mapperContext);
            committer.commitTask(context);
        }
    }
    // wait till all tasks are done
    if (pool != null) {
        for (Future<Object> f : taskList) {
            f.get();
        }
        pool.shutdown();
        while (!pool.awaitTermination(1, TimeUnit.DAYS))
            ;
        jobComplete.set(true);
    }
    monitor.interrupt();
    monitor.join(1000);

    // report counters
    Iterator<CounterGroup> groupIt = reporter.counters.iterator();
    while (groupIt.hasNext()) {
        CounterGroup group = groupIt.next();
        LOG.info(group.getDisplayName() + ": ");
        Iterator<Counter> counterIt = group.iterator();
        while (counterIt.hasNext()) {
            Counter counter = counterIt.next();
            LOG.info(counter.getDisplayName() + ": " + counter.getValue());
        }
    }
    LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec");
}

From source file:datafu.hourglass.jobs.StagedOutputJob.java

License:Apache License

/**
 * Writes Hadoop counters and other task statistics to a file in the file system.
 * //from  w w w.  j a  v a2s.co  m
 * @param fs
 * @throws IOException
 */
private void writeCounters(final FileSystem fs) throws IOException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);

    SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss");

    String suffix = timestampFormat.format(new Date());

    if (_countersParentPath != null) {
        if (!fs.exists(_countersParentPath)) {
            _log.info("Creating counter parent path " + _countersParentPath);
            fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x"));
        }
        // make the name as unique as possible in this case because this may be a directory
        // where other counter files will be dropped
        _countersPath = new Path(_countersParentPath, ".counters." + suffix);
    } else {
        _countersPath = new Path(actualOutputPath, ".counters." + suffix);
    }

    _log.info(String.format("Writing counters to %s", _countersPath));
    FSDataOutputStream counterStream = fs.create(_countersPath);
    BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024);
    OutputStreamWriter writer = new OutputStreamWriter(buffer);
    for (String groupName : getCounters().getGroupNames()) {
        for (Counter counter : getCounters().getGroup(groupName)) {
            writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue()));
        }
    }

    JobID jobID = this.getJobID();

    org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(),
            jobID.getId());

    long minStart = Long.MAX_VALUE;
    long maxFinish = 0;
    long setupStart = Long.MAX_VALUE;
    long cleanupFinish = 0;
    DescriptiveStatistics mapStats = new DescriptiveStatistics();
    DescriptiveStatistics reduceStats = new DescriptiveStatistics();
    boolean success = true;

    JobClient jobClient = new JobClient(this.conf);

    Map<String, String> taskIdToType = new HashMap<String, String>();

    TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId);
    if (setupReports.length > 0) {
        _log.info("Processing setup reports");
        for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) {
            taskIdToType.put(report.getTaskID().toString(), "SETUP");
            if (report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start time");
                continue;
            }
            setupStart = Math.min(setupStart, report.getStartTime());
        }
    } else {
        _log.error("No setup reports");
    }

    TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId);
    if (mapReports.length > 0) {
        _log.info("Processing map reports");
        for (TaskReport report : mapReports) {
            taskIdToType.put(report.getTaskID().toString(), "MAP");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            minStart = Math.min(minStart, report.getStartTime());
            mapStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No map reports");
    }

    TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId);
    if (reduceReports.length > 0) {
        _log.info("Processing reduce reports");
        for (TaskReport report : reduceReports) {
            taskIdToType.put(report.getTaskID().toString(), "REDUCE");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            maxFinish = Math.max(maxFinish, report.getFinishTime());
            reduceStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No reduce reports");
    }

    TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId);
    if (cleanupReports.length > 0) {
        _log.info("Processing cleanup reports");
        for (TaskReport report : cleanupReports) {
            taskIdToType.put(report.getTaskID().toString(), "CLEANUP");
            if (report.getFinishTime() == 0) {
                _log.warn("Skipping report with finish time of zero");
                continue;
            }
            cleanupFinish = Math.max(cleanupFinish, report.getFinishTime());
        }
    } else {
        _log.error("No cleanup reports");
    }

    if (minStart == Long.MAX_VALUE) {
        _log.error("Could not determine map-reduce start time");
        success = false;
    }
    if (maxFinish == 0) {
        _log.error("Could not determine map-reduce finish time");
        success = false;
    }

    if (setupStart == Long.MAX_VALUE) {
        _log.error("Could not determine setup start time");
        success = false;
    }
    if (cleanupFinish == 0) {
        _log.error("Could not determine cleanup finish time");
        success = false;
    }

    // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup.
    // Unfortunately the job client doesn't have an easier way to get these statistics.
    Map<String, Integer> attemptStats = new HashMap<String, Integer>();
    _log.info("Processing task attempts");
    for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) {
        String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString());
        String status = event.getTaskStatus().toString();

        String key = String.format("%s_%s_ATTEMPTS", status, type);
        if (!attemptStats.containsKey(key)) {
            attemptStats.put(key, 0);
        }
        attemptStats.put(key, attemptStats.get(key) + 1);
    }

    if (success) {
        writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart));
        writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish));
        writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart));

        writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart));
        writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish));
        writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart));

        writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN()));
        writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax()));
        writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin()));
        writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean()));
        writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation()));
        writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum()));

        writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN()));
        writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax()));
        writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin()));
        writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean()));
        writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation()));
        writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum()));

        writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d",
                (long) mapStats.getSum() + (long) reduceStats.getSum()));

        for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) {
            writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue()));
        }
    }

    writer.close();
    buffer.close();
    counterStream.close();
}

From source file:org.apache.beam.sdk.io.hadoop.format.HDFSSynchronization.java

License:Apache License

private String getJobJtIdentifier(Configuration conf) {
    JobID job = Preconditions.checkNotNull(HadoopFormats.getJobId(conf),
            "Configuration must contain jobID under key %s.", HadoopFormatIO.JOB_ID);
    return job.getJtIdentifier();
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocol.java

License:Apache License

/** {@inheritDoc} */
@Override/* w  w  w .  ja v a2  s  .c  o m*/
public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts)
        throws IOException, InterruptedException {
    try {
        conf.setLong(JOB_SUBMISSION_START_TS_PROPERTY, U.currentTimeMillis());

        GridHadoopJobStatus status = cli.compute().execute(GridHadoopProtocolSubmitJobTask.class.getName(),
                new GridHadoopProtocolTaskArguments(jobId.getJtIdentifier(), jobId.getId(),
                        createJobInfo(conf)));

        assert status != null;

        return processStatus(status);
    } catch (GridClientException | IgniteCheckedException e) {
        throw new IOException("Failed to submit job.", e);
    }
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocol.java

License:Apache License

/** {@inheritDoc} */
@Override//w w w .  j  a v  a 2s .  co  m
public void killJob(JobID jobId) throws IOException, InterruptedException {
    try {
        cli.compute().execute(GridHadoopProtocolKillJobTask.class.getName(),
                new GridHadoopProtocolTaskArguments(jobId.getJtIdentifier(), jobId.getId()));
    } catch (GridClientException e) {
        throw new IOException("Failed to kill job: " + jobId, e);
    }
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocol.java

License:Apache License

/** {@inheritDoc} */
@Override/*from   w w  w.j av a 2  s.  c o  m*/
public JobStatus getJobStatus(JobID jobId) throws IOException, InterruptedException {
    try {
        Long delay = conf.getLong(GridHadoopJobProperty.JOB_STATUS_POLL_DELAY.propertyName(), -1);

        GridHadoopProtocolTaskArguments args = delay >= 0
                ? new GridHadoopProtocolTaskArguments(jobId.getJtIdentifier(), jobId.getId(), delay)
                : new GridHadoopProtocolTaskArguments(jobId.getJtIdentifier(), jobId.getId());

        GridHadoopJobStatus status = cli.compute().execute(GridHadoopProtocolJobStatusTask.class.getName(),
                args);

        if (status == null)
            throw new IOException("Job tracker doesn't have any information about the job: " + jobId);

        return processStatus(status);
    } catch (GridClientException e) {
        throw new IOException("Failed to get job status: " + jobId, e);
    }
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocol.java

License:Apache License

/** {@inheritDoc} */
@Override/* w  ww  .ja  v  a2  s.  co m*/
public Counters getJobCounters(JobID jobId) throws IOException, InterruptedException {
    try {
        final GridHadoopCounters counters = cli.compute().execute(
                GridHadoopProtocolJobCountersTask.class.getName(),
                new GridHadoopProtocolTaskArguments(jobId.getJtIdentifier(), jobId.getId()));

        if (counters == null)
            throw new IOException("Job tracker doesn't have any information about the job: " + jobId);

        return new GridHadoopClientCounters(counters);
    } catch (GridClientException e) {
        throw new IOException("Failed to get job counters: " + jobId, e);
    }
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Test next job ID generation./*from   ww w .  ja v  a 2s.com*/
 *
 * @throws Exception If failed.
 */
@SuppressWarnings("ConstantConditions")
private void tstNextJobId() throws Exception {
    GridHadoopClientProtocolProvider provider = provider();

    ClientProtocol proto = provider.create(config(GridHadoopAbstractSelfTest.REST_PORT));

    JobID jobId = proto.getNewJobID();

    assert jobId != null;
    assert jobId.getJtIdentifier() != null;

    JobID nextJobId = proto.getNewJobID();

    assert nextJobId != null;
    assert nextJobId.getJtIdentifier() != null;

    assert !F.eq(jobId, nextJobId);
}

From source file:org.apache.ignite.client.hadoop.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Test next job ID generation.//from   www .  jav  a2 s .c o m
 *
 * @throws Exception If failed.
 */
@SuppressWarnings("ConstantConditions")
private void tstNextJobId() throws Exception {
    IgniteHadoopClientProtocolProvider provider = provider();

    ClientProtocol proto = provider.create(config(HadoopAbstractSelfTest.REST_PORT));

    JobID jobId = proto.getNewJobID();

    assert jobId != null;
    assert jobId.getJtIdentifier() != null;

    JobID nextJobId = proto.getNewJobID();

    assert nextJobId != null;
    assert nextJobId.getJtIdentifier() != null;

    assert !F.eq(jobId, nextJobId);
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.proto.HadoopClientProtocol.java

License:Apache License

/** {@inheritDoc} */
@Override/*from  w w w  .j av  a 2s .c  om*/
public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts)
        throws IOException, InterruptedException {
    try {
        conf.setLong(HadoopCommonUtils.JOB_SUBMISSION_START_TS_PROPERTY, U.currentTimeMillis());

        HadoopJobStatus status = execute(HadoopProtocolSubmitJobTask.class, jobId.getJtIdentifier(),
                jobId.getId(), createJobInfo(conf));

        if (status == null)
            throw new IOException("Failed to submit job (null status obtained): " + jobId);

        return processStatus(status);
    } catch (GridClientException | IgniteCheckedException e) {
        throw new IOException("Failed to submit job.", e);
    }
}