List of usage examples for org.apache.hadoop.mapreduce TaskType REDUCE
TaskType REDUCE
To view the source code for org.apache.hadoop.mapreduce TaskType REDUCE.
Click Source Link
From source file:cascading.stats.hadoop.HadoopNodeStats.java
License:Open Source License
/** * Retrieves the TaskReports via the mapreduce API. * * @param kind The kind of TaskReport to retrieve. * @return An array of TaskReports, but never <code>nul</code>. * @throws IOException// www . j av a 2 s . c o m */ private TaskReport[] retrieveTaskReports(HadoopSliceStats.Kind kind) throws IOException, InterruptedException { Job job = HadoopStepStats.getJob(getJobStatusClient()); if (job == null) return new TaskReport[0]; switch (kind) { case MAPPER: return job.getTaskReports(TaskType.MAP); case REDUCER: return job.getTaskReports(TaskType.REDUCE); case SETUP: return job.getTaskReports(TaskType.JOB_SETUP); case CLEANUP: return job.getTaskReports(TaskType.JOB_CLEANUP); default: return new TaskReport[0]; } }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceMetricsWriter.java
License:Apache License
private void reportMapredStats(Counters jobCounters) throws IOException, InterruptedException { JobStatus jobStatus = jobConf.getStatus(); // map stats/*from w ww. j a v a 2 s .c o m*/ float mapProgress = jobStatus.getMapProgress(); int runningMappers = 0; int runningReducers = 0; for (TaskReport tr : jobConf.getTaskReports(TaskType.MAP)) { reportMapTaskMetrics(tr); runningMappers += tr.getRunningTaskAttemptIds().size(); } for (TaskReport tr : jobConf.getTaskReports(TaskType.REDUCE)) { reportReduceTaskMetrics(tr); runningReducers += tr.getRunningTaskAttemptIds().size(); } int memoryPerMapper = jobConf.getConfiguration().getInt(Job.MAP_MEMORY_MB, Job.DEFAULT_MAP_MEMORY_MB); int memoryPerReducer = jobConf.getConfiguration().getInt(Job.REDUCE_MEMORY_MB, Job.DEFAULT_REDUCE_MEMORY_MB); long mapInputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_INPUT_RECORDS); long mapOutputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_RECORDS); long mapOutputBytes = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_BYTES); mapperMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (mapProgress * 100)); mapperMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, mapInputRecords); mapperMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, mapOutputRecords); mapperMetrics.gauge(MapReduceMetrics.METRIC_BYTES, mapOutputBytes); mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningMappers); mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningMappers * memoryPerMapper); LOG.trace("Reporting mapper stats: (completion, containers, memory) = ({}, {}, {})", (int) (mapProgress * 100), runningMappers, runningMappers * memoryPerMapper); // reduce stats float reduceProgress = jobStatus.getReduceProgress(); long reduceInputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_INPUT_RECORDS); long reduceOutputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_OUTPUT_RECORDS); reducerMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (reduceProgress * 100)); reducerMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, reduceInputRecords); reducerMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, reduceOutputRecords); reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningReducers); reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningReducers * memoryPerReducer); LOG.trace("Reporting reducer stats: (completion, containers, memory) = ({}, {}, {})", (int) (reduceProgress * 100), runningReducers, runningReducers * memoryPerReducer); }
From source file:com.asakusafw.runtime.compatibility.hadoop2.JobCompatibilityHadoop2.java
License:Apache License
@Override public TaskID newReduceTaskId(JobID jobId, int id) { if (jobId == null) { throw new IllegalArgumentException("jobId must not be null"); //$NON-NLS-1$ }//ww w. j av a2 s . co m if (TASK_ID_MR2 != null) { TaskID result = newTaskIdMr2(jobId, TaskType.REDUCE, id); if (result != null) { return result; } } return newTaskIdMr1(jobId, false, id); }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Generate random data, compress it, index and md5 hash the data. * Then read it all back and md5 that too, to verify that it all went ok. * // w ww . j av a 2 s . c o m * @param testWithIndex Should we index or not? * @param charsToOutput How many characters of random data should we output. * @throws IOException * @throws NoSuchAlgorithmException * @throws InterruptedException */ private void runTest(boolean testWithIndex, int charsToOutput) throws IOException, NoSuchAlgorithmException, InterruptedException { if (!GPLNativeCodeLoader.isNativeCodeLoaded()) { LOG.warn("Cannot run this test without the native lzo libraries"); return; } Configuration conf = new Configuration(); conf.setLong("fs.local.block.size", charsToOutput / 2); // reducing block size to force a split of the tiny file conf.set("io.compression.codecs", LzopCodec.class.getName()); FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(outputDir, true); localFs.mkdirs(outputDir); Job job = new Job(conf); TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class); TextOutputFormat.setOutputPath(job, outputDir); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2)); // create some input data byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput); if (testWithIndex) { Path lzoFile = new Path(outputDir, lzoFileName); LzoTextInputFormat.createIndex(localFs, lzoFile); } LzoTextInputFormat inputFormat = new LzoTextInputFormat(); TextInputFormat.setInputPaths(job, outputDir); List<InputSplit> is = inputFormat.getSplits(job); //verify we have the right number of lzo chunks if (testWithIndex && OUTPUT_BIG == charsToOutput) { assertEquals(3, is.size()); } else { assertEquals(1, is.size()); } // let's read it all and calculate the md5 hash for (InputSplit inputSplit : is) { RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext); rr.initialize(inputSplit, attemptContext); while (rr.nextKeyValue()) { Text value = rr.getCurrentValue(); md5.update(value.getBytes(), 0, value.getLength()); } rr.close(); } localFs.close(); assertTrue(Arrays.equals(expectedMd5, md5.digest())); }
From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormatTest.java
License:Apache License
private void mockTaskAttemptContext(String indexType) { TaskAttemptID fakeTaskId = new TaskAttemptID(new TaskID("foo_task_" + indexType, 123, TaskType.REDUCE, 2), 2);//w ww.j a va 2 s . c o m when(fakeTaskAttemptContext.getTaskAttemptID()).thenReturn(fakeTaskId); when(fakeTaskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration()); }
From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java
License:Apache License
private static Set<String> runTasks(JobContext job, int numTasks, int numFiles) throws IOException { Set<String> uploads = Sets.newHashSet(); for (int taskId = 0; taskId < numTasks; taskId += 1) { TaskAttemptID attemptID = new TaskAttemptID(new TaskID(JOB_ID, TaskType.REDUCE, taskId), (taskId * 37) % numTasks); TaskAttemptContext attempt = new TaskAttemptContextImpl(new Configuration(job.getConfiguration()), attemptID);/*from ww w . j a v a2s . co m*/ MockedS3Committer taskCommitter = new MockedS3Committer(S3_OUTPUT_PATH, attempt); commitTask(taskCommitter, attempt, numFiles); uploads.addAll(taskCommitter.results.getUploads()); } return uploads; }
From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java
License:Apache License
private static Path writeOutputFile(TaskAttemptID id, Path dest, String content, long copies) throws IOException { String fileName = ((id.getTaskType() == TaskType.REDUCE) ? "r_" : "m_") + id.getTaskID().getId() + "_" + id.getId() + "_" + UUID.randomUUID().toString(); Path outPath = new Path(dest, fileName); FileSystem fs = outPath.getFileSystem(getConfiguration()); try (OutputStream out = fs.create(outPath)) { byte[] bytes = content.getBytes(StandardCharsets.UTF_8); for (int i = 0; i < copies; i += 1) { out.write(bytes);/* ww w . ja va 2 s . c om*/ } } return outPath; }
From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusChecker.java
License:Apache License
@Override public void run() { // 0. set thread name setThreadName();//from w w w. j a va 2 s. c om if (isInWhitelist(appReport)) { LOG.warn("Skipping whitelisted app " + appId); return; } LOG.debug("Running " + Thread.currentThread().getName()); try { try { init(); } catch (Exception e) { LOG.error("Skipping app " + appId + " due to initialization error: " + e.getMessage()); return; } if (!checkApp()) return; loadClientService(); checkTasks(TaskType.MAP); checkTasks(TaskType.REDUCE); clientCache.stopClient(jobId); } catch (YarnException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { appCheckerProgress.finished(); } }
From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java
License:Apache License
@Test public void testReduceTasks() throws Exception { killCounter = 0;//from ww w .j a va 2s .com final String pName = HadoopJobMonitorConfiguration.REDUCE_MAX_RUNTIME_MIN; final boolean passCheck = true, killed = true, dryRun = true, enforce = true; testTask(TaskType.REDUCE, pName, 5, 10, enforce, !dryRun, TIPStatus.RUNNING, passCheck, !killed); testTask(TaskType.REDUCE, pName, 5, 10, 0.01f, enforce, !dryRun, TIPStatus.RUNNING, passCheck, !killed); testTask(TaskType.REDUCE, pName, 15, 10, enforce, !dryRun, TIPStatus.FAILED, passCheck, !killed); testTask(TaskType.REDUCE, pName, 15, 10, enforce, !dryRun, TIPStatus.RUNNING, !passCheck, killed); testTask(TaskType.REDUCE, pName, 15, 10, !enforce, !dryRun, TIPStatus.RUNNING, !passCheck, !killed); testTask(TaskType.REDUCE, pName, 15, 10, !enforce, dryRun, TIPStatus.RUNNING, !passCheck, !killed); testTask(TaskType.REDUCE, pName, 15, 10, enforce, dryRun, TIPStatus.RUNNING, !passCheck, !killed); }
From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java
License:Apache License
@Test public void testReduceProgress() throws Exception { testProgress(TaskType.REDUCE, HadoopJobMonitorConfiguration.REDUCE_MAX_RUNTIME_MIN); }