Example usage for org.apache.hadoop.mapreduce TaskType REDUCE

List of usage examples for org.apache.hadoop.mapreduce TaskType REDUCE

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskType REDUCE.

Prototype

TaskType REDUCE

To view the source code for org.apache.hadoop.mapreduce TaskType REDUCE.

Click Source Link

Usage

From source file:cascading.stats.hadoop.HadoopNodeStats.java

License:Open Source License

/**
 * Retrieves the TaskReports via the mapreduce API.
 *
 * @param kind The kind of TaskReport to retrieve.
 * @return An array of TaskReports, but never <code>nul</code>.
 * @throws IOException// www . j  av a 2  s  . c o m
 */
private TaskReport[] retrieveTaskReports(HadoopSliceStats.Kind kind) throws IOException, InterruptedException {
    Job job = HadoopStepStats.getJob(getJobStatusClient());

    if (job == null)
        return new TaskReport[0];

    switch (kind) {
    case MAPPER:
        return job.getTaskReports(TaskType.MAP);
    case REDUCER:
        return job.getTaskReports(TaskType.REDUCE);
    case SETUP:
        return job.getTaskReports(TaskType.JOB_SETUP);
    case CLEANUP:
        return job.getTaskReports(TaskType.JOB_CLEANUP);
    default:
        return new TaskReport[0];
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceMetricsWriter.java

License:Apache License

private void reportMapredStats(Counters jobCounters) throws IOException, InterruptedException {
    JobStatus jobStatus = jobConf.getStatus();
    // map stats/*from w  ww.  j a  v  a 2 s  .c  o m*/
    float mapProgress = jobStatus.getMapProgress();
    int runningMappers = 0;
    int runningReducers = 0;
    for (TaskReport tr : jobConf.getTaskReports(TaskType.MAP)) {
        reportMapTaskMetrics(tr);
        runningMappers += tr.getRunningTaskAttemptIds().size();
    }
    for (TaskReport tr : jobConf.getTaskReports(TaskType.REDUCE)) {
        reportReduceTaskMetrics(tr);
        runningReducers += tr.getRunningTaskAttemptIds().size();
    }
    int memoryPerMapper = jobConf.getConfiguration().getInt(Job.MAP_MEMORY_MB, Job.DEFAULT_MAP_MEMORY_MB);
    int memoryPerReducer = jobConf.getConfiguration().getInt(Job.REDUCE_MEMORY_MB,
            Job.DEFAULT_REDUCE_MEMORY_MB);

    long mapInputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_INPUT_RECORDS);
    long mapOutputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_RECORDS);
    long mapOutputBytes = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_BYTES);

    mapperMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (mapProgress * 100));
    mapperMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, mapInputRecords);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, mapOutputRecords);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_BYTES, mapOutputBytes);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningMappers);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningMappers * memoryPerMapper);

    LOG.trace("Reporting mapper stats: (completion, containers, memory) = ({}, {}, {})",
            (int) (mapProgress * 100), runningMappers, runningMappers * memoryPerMapper);

    // reduce stats
    float reduceProgress = jobStatus.getReduceProgress();
    long reduceInputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_INPUT_RECORDS);
    long reduceOutputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_OUTPUT_RECORDS);

    reducerMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (reduceProgress * 100));
    reducerMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, reduceInputRecords);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, reduceOutputRecords);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningReducers);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningReducers * memoryPerReducer);

    LOG.trace("Reporting reducer stats: (completion, containers, memory) = ({}, {}, {})",
            (int) (reduceProgress * 100), runningReducers, runningReducers * memoryPerReducer);
}

From source file:com.asakusafw.runtime.compatibility.hadoop2.JobCompatibilityHadoop2.java

License:Apache License

@Override
public TaskID newReduceTaskId(JobID jobId, int id) {
    if (jobId == null) {
        throw new IllegalArgumentException("jobId must not be null"); //$NON-NLS-1$
    }//ww w.  j  av  a2  s  . co m
    if (TASK_ID_MR2 != null) {
        TaskID result = newTaskIdMr2(jobId, TaskType.REDUCE, id);
        if (result != null) {
            return result;
        }
    }
    return newTaskIdMr1(jobId, false, id);
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Generate random data, compress it, index and md5 hash the data.
 * Then read it all back and md5 that too, to verify that it all went ok.
 * //  w ww  . j av  a  2 s  .  c  o  m
 * @param testWithIndex Should we index or not?
 * @param charsToOutput How many characters of random data should we output.
 * @throws IOException
 * @throws NoSuchAlgorithmException
 * @throws InterruptedException
 */
private void runTest(boolean testWithIndex, int charsToOutput)
        throws IOException, NoSuchAlgorithmException, InterruptedException {

    if (!GPLNativeCodeLoader.isNativeCodeLoaded()) {
        LOG.warn("Cannot run this test without the native lzo libraries");
        return;
    }

    Configuration conf = new Configuration();
    conf.setLong("fs.local.block.size", charsToOutput / 2);
    // reducing block size to force a split of the tiny file
    conf.set("io.compression.codecs", LzopCodec.class.getName());

    FileSystem localFs = FileSystem.getLocal(conf);
    localFs.delete(outputDir, true);
    localFs.mkdirs(outputDir);

    Job job = new Job(conf);
    TextOutputFormat.setCompressOutput(job, true);
    TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2));

    // create some input data
    byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput);

    if (testWithIndex) {
        Path lzoFile = new Path(outputDir, lzoFileName);
        LzoTextInputFormat.createIndex(localFs, lzoFile);
    }

    LzoTextInputFormat inputFormat = new LzoTextInputFormat();
    TextInputFormat.setInputPaths(job, outputDir);

    List<InputSplit> is = inputFormat.getSplits(job);
    //verify we have the right number of lzo chunks
    if (testWithIndex && OUTPUT_BIG == charsToOutput) {
        assertEquals(3, is.size());
    } else {
        assertEquals(1, is.size());
    }

    // let's read it all and calculate the md5 hash
    for (InputSplit inputSplit : is) {
        RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext);
        rr.initialize(inputSplit, attemptContext);

        while (rr.nextKeyValue()) {
            Text value = rr.getCurrentValue();

            md5.update(value.getBytes(), 0, value.getLength());
        }

        rr.close();
    }

    localFs.close();
    assertTrue(Arrays.equals(expectedMd5, md5.digest()));
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormatTest.java

License:Apache License

private void mockTaskAttemptContext(String indexType) {
    TaskAttemptID fakeTaskId = new TaskAttemptID(new TaskID("foo_task_" + indexType, 123, TaskType.REDUCE, 2),
            2);//w ww.j  a  va 2 s . c  o  m
    when(fakeTaskAttemptContext.getTaskAttemptID()).thenReturn(fakeTaskId);
    when(fakeTaskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
}

From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java

License:Apache License

private static Set<String> runTasks(JobContext job, int numTasks, int numFiles) throws IOException {
    Set<String> uploads = Sets.newHashSet();

    for (int taskId = 0; taskId < numTasks; taskId += 1) {
        TaskAttemptID attemptID = new TaskAttemptID(new TaskID(JOB_ID, TaskType.REDUCE, taskId),
                (taskId * 37) % numTasks);
        TaskAttemptContext attempt = new TaskAttemptContextImpl(new Configuration(job.getConfiguration()),
                attemptID);/*from  ww w  .  j a  v  a2s .  co m*/
        MockedS3Committer taskCommitter = new MockedS3Committer(S3_OUTPUT_PATH, attempt);
        commitTask(taskCommitter, attempt, numFiles);
        uploads.addAll(taskCommitter.results.getUploads());
    }

    return uploads;
}

From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java

License:Apache License

private static Path writeOutputFile(TaskAttemptID id, Path dest, String content, long copies)
        throws IOException {
    String fileName = ((id.getTaskType() == TaskType.REDUCE) ? "r_" : "m_") + id.getTaskID().getId() + "_"
            + id.getId() + "_" + UUID.randomUUID().toString();
    Path outPath = new Path(dest, fileName);
    FileSystem fs = outPath.getFileSystem(getConfiguration());

    try (OutputStream out = fs.create(outPath)) {
        byte[] bytes = content.getBytes(StandardCharsets.UTF_8);
        for (int i = 0; i < copies; i += 1) {
            out.write(bytes);/* ww  w  . ja va 2 s  .  c  om*/
        }
    }

    return outPath;
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusChecker.java

License:Apache License

@Override
public void run() {
    // 0. set thread name
    setThreadName();//from   w w w.  j a  va  2 s. c  om
    if (isInWhitelist(appReport)) {
        LOG.warn("Skipping whitelisted app " + appId);
        return;
    }
    LOG.debug("Running " + Thread.currentThread().getName());
    try {
        try {
            init();
        } catch (Exception e) {
            LOG.error("Skipping app " + appId + " due to initialization error: " + e.getMessage());
            return;
        }
        if (!checkApp())
            return;
        loadClientService();
        checkTasks(TaskType.MAP);
        checkTasks(TaskType.REDUCE);
        clientCache.stopClient(jobId);
    } catch (YarnException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {
        appCheckerProgress.finished();
    }
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

@Test
public void testReduceTasks() throws Exception {
    killCounter = 0;//from   ww  w .j  a va 2s .com
    final String pName = HadoopJobMonitorConfiguration.REDUCE_MAX_RUNTIME_MIN;
    final boolean passCheck = true, killed = true, dryRun = true, enforce = true;
    testTask(TaskType.REDUCE, pName, 5, 10, enforce, !dryRun, TIPStatus.RUNNING, passCheck, !killed);
    testTask(TaskType.REDUCE, pName, 5, 10, 0.01f, enforce, !dryRun, TIPStatus.RUNNING, passCheck, !killed);
    testTask(TaskType.REDUCE, pName, 15, 10, enforce, !dryRun, TIPStatus.FAILED, passCheck, !killed);
    testTask(TaskType.REDUCE, pName, 15, 10, enforce, !dryRun, TIPStatus.RUNNING, !passCheck, killed);
    testTask(TaskType.REDUCE, pName, 15, 10, !enforce, !dryRun, TIPStatus.RUNNING, !passCheck, !killed);
    testTask(TaskType.REDUCE, pName, 15, 10, !enforce, dryRun, TIPStatus.RUNNING, !passCheck, !killed);
    testTask(TaskType.REDUCE, pName, 15, 10, enforce, dryRun, TIPStatus.RUNNING, !passCheck, !killed);
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

@Test
public void testReduceProgress() throws Exception {
    testProgress(TaskType.REDUCE, HadoopJobMonitorConfiguration.REDUCE_MAX_RUNTIME_MIN);
}