Example usage for org.apache.hadoop.mapreduce TaskType MAP

List of usage examples for org.apache.hadoop.mapreduce TaskType MAP

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskType MAP.

Prototype

TaskType MAP

To view the source code for org.apache.hadoop.mapreduce TaskType MAP.

Click Source Link

Usage

From source file:cascading.stats.hadoop.HadoopNodeStats.java

License:Open Source License

/**
 * Retrieves the TaskReports via the mapreduce API.
 *
 * @param kind The kind of TaskReport to retrieve.
 * @return An array of TaskReports, but never <code>nul</code>.
 * @throws IOException/* w  w w. j  av a 2  s.co m*/
 */
private TaskReport[] retrieveTaskReports(HadoopSliceStats.Kind kind) throws IOException, InterruptedException {
    Job job = HadoopStepStats.getJob(getJobStatusClient());

    if (job == null)
        return new TaskReport[0];

    switch (kind) {
    case MAPPER:
        return job.getTaskReports(TaskType.MAP);
    case REDUCER:
        return job.getTaskReports(TaskType.REDUCE);
    case SETUP:
        return job.getTaskReports(TaskType.JOB_SETUP);
    case CLEANUP:
        return job.getTaskReports(TaskType.JOB_CLEANUP);
    default:
        return new TaskReport[0];
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceMetricsWriter.java

License:Apache License

private void reportMapredStats(Counters jobCounters) throws IOException, InterruptedException {
    JobStatus jobStatus = jobConf.getStatus();
    // map stats//from w w w .j  a va 2 s  . c o  m
    float mapProgress = jobStatus.getMapProgress();
    int runningMappers = 0;
    int runningReducers = 0;
    for (TaskReport tr : jobConf.getTaskReports(TaskType.MAP)) {
        reportMapTaskMetrics(tr);
        runningMappers += tr.getRunningTaskAttemptIds().size();
    }
    for (TaskReport tr : jobConf.getTaskReports(TaskType.REDUCE)) {
        reportReduceTaskMetrics(tr);
        runningReducers += tr.getRunningTaskAttemptIds().size();
    }
    int memoryPerMapper = jobConf.getConfiguration().getInt(Job.MAP_MEMORY_MB, Job.DEFAULT_MAP_MEMORY_MB);
    int memoryPerReducer = jobConf.getConfiguration().getInt(Job.REDUCE_MEMORY_MB,
            Job.DEFAULT_REDUCE_MEMORY_MB);

    long mapInputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_INPUT_RECORDS);
    long mapOutputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_RECORDS);
    long mapOutputBytes = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_BYTES);

    mapperMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (mapProgress * 100));
    mapperMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, mapInputRecords);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, mapOutputRecords);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_BYTES, mapOutputBytes);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningMappers);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningMappers * memoryPerMapper);

    LOG.trace("Reporting mapper stats: (completion, containers, memory) = ({}, {}, {})",
            (int) (mapProgress * 100), runningMappers, runningMappers * memoryPerMapper);

    // reduce stats
    float reduceProgress = jobStatus.getReduceProgress();
    long reduceInputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_INPUT_RECORDS);
    long reduceOutputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_OUTPUT_RECORDS);

    reducerMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (reduceProgress * 100));
    reducerMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, reduceInputRecords);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, reduceOutputRecords);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningReducers);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningReducers * memoryPerReducer);

    LOG.trace("Reporting reducer stats: (completion, containers, memory) = ({}, {}, {})",
            (int) (reduceProgress * 100), runningReducers, runningReducers * memoryPerReducer);
}

From source file:com.asakusafw.runtime.compatibility.hadoop2.JobCompatibilityHadoop2.java

License:Apache License

@Override
public TaskID newMapTaskId(JobID jobId, int id) {
    if (jobId == null) {
        throw new IllegalArgumentException("jobId must not be null"); //$NON-NLS-1$
    }/*w  ww  . j av a2 s  . com*/
    if (TASK_ID_MR2 != null) {
        TaskID result = newTaskIdMr2(jobId, TaskType.MAP, id);
        if (result != null) {
            return result;
        }
    }
    return newTaskIdMr1(jobId, true, id);
}

From source file:com.asakusafw.runtime.compatibility.hadoop2.JobCompatibilityHadoop2.java

License:Apache License

@Override
public Counter getTaskOutputRecordCounter(TaskInputOutputContext<?, ?, ?, ?> context) {
    if (context == null) {
        throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
    }//  www .  j av  a  2s. c om
    if (context.getTaskAttemptID().getTaskType() == TaskType.MAP) {
        return context.getCounter(TaskCounter.MAP_OUTPUT_RECORDS);
    } else {
        return context.getCounter(TaskCounter.REDUCE_OUTPUT_RECORDS);
    }
}

From source file:com.asakusafw.testdriver.file.FileDeployer.java

License:Apache License

/**
 * Opens output for the specified {@link OutputFormat}.
 * @param <V> value type//from   w w  w  . j a v  a  2  s .  c  o m
 * @param definition target model definition
 * @param destination output location
 * @param output format
 * @return the opened {@link ModelOutput}
 * @throws IOException if failed to open the target output
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public <V> ModelOutput<V> openOutput(DataModelDefinition<V> definition, final String destination,
        FileOutputFormat<? super NullWritable, ? super V> output) throws IOException {
    assert destination != null;
    assert output != null;
    LOG.debug("Opening {} using {}", destination, output.getClass().getName());
    Job job = Job.getInstance(configuration);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(definition.getModelClass());
    final File temporaryDir = File.createTempFile("asakusa", ".tempdir");
    if (temporaryDir.delete() == false || temporaryDir.mkdirs() == false) {
        throw new IOException("Failed to create temporary directory");
    }
    LOG.debug("Using staging deploy target: {}", temporaryDir);
    URI uri = temporaryDir.toURI();
    FileOutputFormat.setOutputPath(job, new Path(uri));
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0));
    FileOutputFormatDriver<V> result = new FileOutputFormatDriver<V>(context, output, NullWritable.get()) {
        @Override
        public void close() throws IOException {
            super.close();
            deploy(destination, temporaryDir);
        }
    };
    return result;
}

From source file:com.asakusafw.testdriver.file.FileExporterRetriever.java

License:Apache License

@Override
public <V> DataModelSource createSource(DataModelDefinition<V> definition, FileExporterDescription description,
        TestContext context) throws IOException {
    LOG.info("??????: {}", description);
    VariableTable variables = createVariables(context);
    checkType(definition, description);/*from  www .j av  a 2s .  c  om*/
    Configuration conf = configurations.newInstance();
    Job job = Job.getInstance(conf);
    String resolved = variables.parse(description.getPathPrefix(), false);
    FileInputFormat.setInputPaths(job, new Path(resolved));
    TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0));
    FileInputFormat<?, V> format = getOpposite(conf, description.getOutputFormat());
    FileInputFormatDriver<V> result = new FileInputFormatDriver<>(definition, taskContext, format);
    return result;
}

From source file:com.linkedin.drelephant.mapreduce.fetchers.MapReduceFSFetcherHadoop2.java

License:Apache License

@Override
public MapReduceApplicationData fetchData(AnalyticJob job) throws IOException {
    DataFiles files = getHistoryFiles(job);
    String confFile = files.getJobConfPath();
    String histFile = files.getJobHistPath();
    String appId = job.getAppId();
    String jobId = Utils.getJobIdFromApplicationId(appId);

    MapReduceApplicationData jobData = new MapReduceApplicationData();
    jobData.setAppId(appId).setJobId(jobId);

    // Fetch job config
    Configuration jobConf = new Configuration(false);
    jobConf.addResource(_fs.open(new Path(confFile)), confFile);
    Properties jobConfProperties = new Properties();
    for (Map.Entry<String, String> entry : jobConf) {
        jobConfProperties.put(entry.getKey(), entry.getValue());
    }/*from   w  w w  . j  a va2  s  . co m*/
    jobData.setJobConf(jobConfProperties);

    // Check if job history file is too large and should be throttled
    if (_fs.getFileStatus(new Path(histFile)).getLen() > _maxLogSizeInMB * FileUtils.ONE_MB) {
        String errMsg = "The history log of MapReduce application: " + appId + " is over the limit size of "
                + _maxLogSizeInMB + " MB, the parsing process gets throttled.";
        logger.warn(errMsg);
        jobData.setDiagnosticInfo(errMsg);
        jobData.setSucceeded(false); // set succeeded to false to avoid heuristic analysis
        return jobData;
    }

    // Analyze job history file
    JobHistoryParser parser = new JobHistoryParser(_fs, histFile);
    JobHistoryParser.JobInfo jobInfo = parser.parse();
    IOException parseException = parser.getParseException();
    if (parseException != null) {
        throw new RuntimeException("Could not parse history file " + histFile, parseException);
    }

    jobData.setSubmitTime(jobInfo.getSubmitTime());
    jobData.setStartTime(jobInfo.getLaunchTime());
    jobData.setFinishTime(jobInfo.getFinishTime());

    String state = jobInfo.getJobStatus();
    if (state.equals("SUCCEEDED")) {

        jobData.setSucceeded(true);

        // Fetch job counter
        MapReduceCounterData jobCounter = getCounterData(jobInfo.getTotalCounters());

        // Fetch task data
        Map<TaskID, JobHistoryParser.TaskInfo> allTasks = jobInfo.getAllTasks();
        List<JobHistoryParser.TaskInfo> mapperInfoList = new ArrayList<JobHistoryParser.TaskInfo>();
        List<JobHistoryParser.TaskInfo> reducerInfoList = new ArrayList<JobHistoryParser.TaskInfo>();
        for (JobHistoryParser.TaskInfo taskInfo : allTasks.values()) {
            if (taskInfo.getTaskType() == TaskType.MAP) {
                mapperInfoList.add(taskInfo);
            } else {
                reducerInfoList.add(taskInfo);
            }
        }
        if (jobInfo.getTotalMaps() > MAX_SAMPLE_SIZE) {
            logger.debug(jobId + " total mappers: " + mapperInfoList.size());
        }
        if (jobInfo.getTotalReduces() > MAX_SAMPLE_SIZE) {
            logger.debug(jobId + " total reducers: " + reducerInfoList.size());
        }
        MapReduceTaskData[] mapperList = getTaskData(jobId, mapperInfoList);
        MapReduceTaskData[] reducerList = getTaskData(jobId, reducerInfoList);

        jobData.setCounters(jobCounter).setMapperData(mapperList).setReducerData(reducerList);
    } else if (state.equals("FAILED")) {

        jobData.setSucceeded(false);
        jobData.setDiagnosticInfo(jobInfo.getErrorInfo());
    } else {
        // Should not reach here
        throw new RuntimeException("Job state not supported. Should be either SUCCEEDED or FAILED");
    }

    return jobData;
}

From source file:com.linkedin.drelephant.mapreduce.fetchers.MapReduceFSFetcherHadoop2.java

License:Apache License

private long[] getTaskExecTime(JobHistoryParser.TaskAttemptInfo attempInfo) {
    long startTime = attempInfo.getStartTime();
    long finishTime = attempInfo.getFinishTime();
    boolean isMapper = (attempInfo.getTaskType() == TaskType.MAP);

    long[] time;/*w ww.j a  v  a  2 s  .  c o m*/
    if (isMapper) {
        time = new long[] { finishTime - startTime, 0, 0, startTime, finishTime };
    } else {
        long shuffleFinishTime = attempInfo.getShuffleFinishTime();
        long mergeFinishTime = attempInfo.getSortFinishTime();
        time = new long[] { finishTime - startTime, shuffleFinishTime - startTime,
                mergeFinishTime - shuffleFinishTime, startTime, finishTime };
    }
    return time;
}

From source file:com.marklogic.contentpump.LocalJobRunner.java

License:Apache License

/**
 * Run the job.  Get the input splits, create map tasks and submit it to
 * the thread pool if there is one; otherwise, runs the the task one by
 * one.//  w  w w  .  jav a  2s  . c o  m
 * 
 * @param <INKEY>
 * @param <INVALUE>
 * @param <OUTKEY>
 * @param <OUTVALUE>
 * @throws Exception
 */
@SuppressWarnings("unchecked")
public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run()
        throws Exception {
    Configuration conf = job.getConfiguration();
    InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils
            .newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = inputFormat.getSplits(job);
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // goes first
    Arrays.sort(array, new SplitLengthComparator());
    OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(job.getOutputFormatClass(), conf);
    Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass();
    Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(mapperClass, conf);
    try {
        outputFormat.checkOutputSpecs(job);
    } catch (Exception ex) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error checking output specification: ", ex);
        } else {
            LOG.error("Error checking output specification: ");
            LOG.error(ex.getMessage());
        }
        return;
    }
    conf = job.getConfiguration();
    progress = new AtomicInteger[splits.size()];
    for (int i = 0; i < splits.size(); i++) {
        progress[i] = new AtomicInteger();
    }
    Monitor monitor = new Monitor();
    monitor.start();
    reporter = new ContentPumpReporter();
    List<Future<Object>> taskList = new ArrayList<Future<Object>>();
    for (int i = 0; i < array.length; i++) {
        InputSplit split = array[i];
        if (pool != null) {
            LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>(
                    inputFormat, outputFormat, conf, i, split, reporter, progress[i]);
            availableThreads = assignThreads(i, array.length);
            Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass();
            if (availableThreads > 1 && availableThreads != threadsPerSplit) {
                // possible runtime adjustment
                if (runtimeMapperClass != (Class) MultithreadedMapper.class) {
                    runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd
                            .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads);
                }
                if (runtimeMapperClass != mapperClass) {
                    task.setMapperClass(runtimeMapperClass);
                }
                if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                    task.setThreadCount(availableThreads);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Thread Count for Split#" + i + " : " + availableThreads);
                    }
                }
            }

            if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                synchronized (pool) {
                    taskList.add(pool.submit(task));
                    pool.wait();
                }
            } else {
                pool.submit(task);
            }
        } else { // single-threaded
            JobID jid = new JobID();
            TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i);
            TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0);
            TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId);
            RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context);
            RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context);
            OutputCommitter committer = outputFormat.getOutputCommitter(context);
            TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]);

            Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId,
                    trackingReader, writer, committer, reporter, split);

            trackingReader.initialize(split, mapperContext);

            // no thread pool (only 1 thread specified)
            Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass();
            mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class);
            mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass,
                    mapperContext.getConfiguration());
            mapper.run(mapperContext);
            trackingReader.close();
            writer.close(mapperContext);
            committer.commitTask(context);
        }
    }
    // wait till all tasks are done
    if (pool != null) {
        for (Future<Object> f : taskList) {
            f.get();
        }
        pool.shutdown();
        while (!pool.awaitTermination(1, TimeUnit.DAYS))
            ;
        jobComplete.set(true);
    }
    monitor.interrupt();
    monitor.join(1000);

    // report counters
    Iterator<CounterGroup> groupIt = reporter.counters.iterator();
    while (groupIt.hasNext()) {
        CounterGroup group = groupIt.next();
        LOG.info(group.getDisplayName() + ": ");
        Iterator<Counter> counterIt = group.iterator();
        while (counterIt.hasNext()) {
            Counter counter = counterIt.next();
            LOG.info(counter.getDisplayName() + ": " + counter.getValue());
        }
    }
    LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec");
}

From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeSameRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314,
            TaskType.MAP, 159, 2);
    final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);//from   w  w  w  .ja v a2s .  co m
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(!fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00001")));

    mFormat.getOutputCommitter(context).commitTask(context);
}