List of usage examples for org.apache.hadoop.mapreduce TaskCounter MAP_INPUT_RECORDS
TaskCounter MAP_INPUT_RECORDS
To view the source code for org.apache.hadoop.mapreduce TaskCounter MAP_INPUT_RECORDS.
Click Source Link
From source file:co.cask.cdap.app.mapreduce.LocalMRJobInfoFetcher.java
License:Apache License
/** * @param runId for which information will be returned. * @return a {@link MRJobInfo} containing information about a particular MapReduce program run. *//*from w w w .j a v a 2s. c o m*/ public MRJobInfo getMRJobInfo(Id.Run runId) { Preconditions.checkArgument(ProgramType.MAPREDUCE.equals(runId.getProgram().getType())); // baseTags has tag keys: ns.app.mr.runid Map<String, String> baseTags = Maps.newHashMap(); baseTags.put(Constants.Metrics.Tag.NAMESPACE, runId.getNamespace().getId()); baseTags.put(Constants.Metrics.Tag.APP, runId.getProgram().getApplicationId()); baseTags.put(Constants.Metrics.Tag.MAPREDUCE, runId.getProgram().getId()); baseTags.put(Constants.Metrics.Tag.RUN_ID, runId.getId()); Map<String, String> mapTags = Maps.newHashMap(baseTags); mapTags.put(Constants.Metrics.Tag.MR_TASK_TYPE, MapReduceMetrics.TaskType.Mapper.getId()); Map<String, String> reduceTags = Maps.newHashMap(baseTags); reduceTags.put(Constants.Metrics.Tag.MR_TASK_TYPE, MapReduceMetrics.TaskType.Reducer.getId()); // map from RunId -> (CounterName -> CounterValue) Table<String, String, Long> mapTaskMetrics = HashBasedTable.create(); Table<String, String, Long> reduceTaskMetrics = HashBasedTable.create(); // Populate mapTaskMetrics and reduce Task Metrics via MetricStore. Used to construct MRTaskInfo below. Map<String, String> metricNamesToCounters = Maps.newHashMap(); metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_INPUT_RECORDS), TaskCounter.MAP_INPUT_RECORDS.name()); metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_OUTPUT_RECORDS), TaskCounter.MAP_OUTPUT_RECORDS.name()); metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_BYTES), TaskCounter.MAP_OUTPUT_BYTES.name()); metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_COMPLETION), MapReduceMetrics.METRIC_TASK_COMPLETION); // get metrics grouped by instance-id for the map tasks queryGroupedAggregates(mapTags, mapTaskMetrics, metricNamesToCounters); Map<String, Long> mapProgress = Maps.newHashMap(); if (mapTaskMetrics.columnMap().containsKey(MapReduceMetrics.METRIC_TASK_COMPLETION)) { mapProgress = Maps .newHashMap(mapTaskMetrics.columnMap().remove(MapReduceMetrics.METRIC_TASK_COMPLETION)); } Map<String, String> reduceMetricsToCounters = Maps.newHashMap(); reduceMetricsToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_INPUT_RECORDS), TaskCounter.REDUCE_INPUT_RECORDS.name()); reduceMetricsToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_OUTPUT_RECORDS), TaskCounter.REDUCE_OUTPUT_RECORDS.name()); reduceMetricsToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_COMPLETION), MapReduceMetrics.METRIC_TASK_COMPLETION); // get metrics grouped by instance-id for the map tasks queryGroupedAggregates(reduceTags, reduceTaskMetrics, reduceMetricsToCounters); Map<String, Long> reduceProgress = Maps.newHashMap(); if (reduceTaskMetrics.columnMap().containsKey(MapReduceMetrics.METRIC_TASK_COMPLETION)) { reduceProgress = Maps .newHashMap(reduceTaskMetrics.columnMap().remove(MapReduceMetrics.METRIC_TASK_COMPLETION)); } // Construct MRTaskInfos from the information we can get from Metric system. List<MRTaskInfo> mapTaskInfos = Lists.newArrayList(); for (Map.Entry<String, Map<String, Long>> taskEntry : mapTaskMetrics.rowMap().entrySet()) { String mapTaskId = taskEntry.getKey(); mapTaskInfos.add(new MRTaskInfo(mapTaskId, null, null, null, mapProgress.get(mapTaskId) / 100.0F, taskEntry.getValue())); } List<MRTaskInfo> reduceTaskInfos = Lists.newArrayList(); for (Map.Entry<String, Map<String, Long>> taskEntry : reduceTaskMetrics.rowMap().entrySet()) { String reduceTaskId = taskEntry.getKey(); reduceTaskInfos.add(new MRTaskInfo(reduceTaskId, null, null, null, reduceProgress.get(reduceTaskId) / 100.0F, taskEntry.getValue())); } return getJobCounters(mapTags, reduceTags, mapTaskInfos, reduceTaskInfos); }
From source file:co.cask.cdap.app.mapreduce.LocalMRJobInfoFetcher.java
License:Apache License
private MRJobInfo getJobCounters(Map<String, String> mapTags, Map<String, String> reduceTags, List<MRTaskInfo> mapTaskInfos, List<MRTaskInfo> reduceTaskInfos) { HashMap<String, Long> metrics = Maps.newHashMap(); Map<String, String> mapMetricsToCounters = ImmutableMap.of( prependSystem(MapReduceMetrics.METRIC_INPUT_RECORDS), TaskCounter.MAP_INPUT_RECORDS.name(), prependSystem(MapReduceMetrics.METRIC_OUTPUT_RECORDS), TaskCounter.MAP_OUTPUT_RECORDS.name(), prependSystem(MapReduceMetrics.METRIC_BYTES), TaskCounter.MAP_OUTPUT_BYTES.name(), prependSystem(MapReduceMetrics.METRIC_COMPLETION), MapReduceMetrics.METRIC_COMPLETION); getAggregates(mapTags, mapMetricsToCounters, metrics); float mapProgress = metrics.remove(MapReduceMetrics.METRIC_COMPLETION) / 100.0F; Map<String, String> reduceMetricsToCounters = ImmutableMap.of( prependSystem(MapReduceMetrics.METRIC_INPUT_RECORDS), TaskCounter.REDUCE_INPUT_RECORDS.name(), prependSystem(MapReduceMetrics.METRIC_OUTPUT_RECORDS), TaskCounter.REDUCE_OUTPUT_RECORDS.name(), prependSystem(MapReduceMetrics.METRIC_COMPLETION), MapReduceMetrics.METRIC_COMPLETION); getAggregates(reduceTags, reduceMetricsToCounters, metrics); float reduceProgress = metrics.remove(MapReduceMetrics.METRIC_COMPLETION) / 100.0F; return new MRJobInfo(mapProgress, reduceProgress, metrics, mapTaskInfos, reduceTaskInfos, false); }
From source file:co.cask.cdap.app.mapreduce.LocalMRJobInfoFetcherTest.java
License:Apache License
@Test public void testGetMRJobInfo() throws Exception { Id.Program programId = Id.Program.from("fooNamespace", "testApp", ProgramType.MAPREDUCE, "fooMapReduce"); Id.Run runId = new Id.Run(programId, "run10878"); Map<String, String> runContext = ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, programId.getNamespaceId(), Constants.Metrics.Tag.APP, programId.getApplicationId(), Constants.Metrics.Tag.MAPREDUCE, programId.getId(), Constants.Metrics.Tag.RUN_ID, runId.getId()); Map<String, String> mapTypeContext = addToContext(runContext, Constants.Metrics.Tag.MR_TASK_TYPE, MapReduceMetrics.TaskType.Mapper.getId()); Map<String, String> reduceTypeContext = addToContext(runContext, Constants.Metrics.Tag.MR_TASK_TYPE, MapReduceMetrics.TaskType.Reducer.getId()); String mapTask1Name = "task_m_01"; Map<String, String> mapTask1Context = addToContext(mapTypeContext, Constants.Metrics.Tag.INSTANCE_ID, mapTask1Name);//from ww w . ja v a2s .c o m String mapTask2Name = "task_m_02"; Map<String, String> mapTask2Context = addToContext(mapTypeContext, Constants.Metrics.Tag.INSTANCE_ID, mapTask2Name); String reduceTaskName = "task_r_01"; Map<String, String> reduceTaskContext = addToContext(reduceTypeContext, Constants.Metrics.Tag.INSTANCE_ID, reduceTaskName); // Imitate a MapReduce Job running (gauge mapper and reducer metrics) long measureTime = System.currentTimeMillis() / 1000; gauge(mapTypeContext, MapReduceMetrics.METRIC_COMPLETION, measureTime, 76L); gauge(reduceTypeContext, MapReduceMetrics.METRIC_COMPLETION, measureTime, 52L); gauge(mapTask1Context, MapReduceMetrics.METRIC_TASK_COMPLETION, measureTime, 100L); gauge(mapTask1Context, MapReduceMetrics.METRIC_TASK_INPUT_RECORDS, measureTime, 32L); gauge(mapTask1Context, MapReduceMetrics.METRIC_TASK_OUTPUT_RECORDS, measureTime, 320L); gauge(mapTask2Context, MapReduceMetrics.METRIC_TASK_COMPLETION, measureTime, 12L); gauge(mapTask2Context, MapReduceMetrics.METRIC_TASK_INPUT_RECORDS, measureTime, 6L); gauge(mapTask2Context, MapReduceMetrics.METRIC_TASK_OUTPUT_RECORDS, measureTime, 60L); // gauge job-level counters for mappers gauge(mapTypeContext, MapReduceMetrics.METRIC_INPUT_RECORDS, measureTime, 38L); gauge(mapTypeContext, MapReduceMetrics.METRIC_OUTPUT_RECORDS, measureTime, 380L); gauge(reduceTaskContext, MapReduceMetrics.METRIC_TASK_COMPLETION, measureTime, 76L); gauge(reduceTaskContext, MapReduceMetrics.METRIC_TASK_INPUT_RECORDS, measureTime, 320L); gauge(reduceTaskContext, MapReduceMetrics.METRIC_TASK_OUTPUT_RECORDS, measureTime, 1L); // gauge job-level counters for reducers gauge(reduceTypeContext, MapReduceMetrics.METRIC_INPUT_RECORDS, measureTime, 320L); gauge(reduceTypeContext, MapReduceMetrics.METRIC_OUTPUT_RECORDS, measureTime, 1L); LocalMRJobInfoFetcher localMRJobInfoFetcher = injector.getInstance(LocalMRJobInfoFetcher.class); MRJobInfo mrJobInfo = localMRJobInfoFetcher.getMRJobInfo(runId); // Incomplete because MapReduceMetricsInfo does not provide task-level state and start/end times. Assert.assertFalse(mrJobInfo.isComplete()); // Check job-level counters Map<String, Long> jobCounters = mrJobInfo.getCounters(); Assert.assertEquals((Long) 38L, jobCounters.get(TaskCounter.MAP_INPUT_RECORDS.name())); Assert.assertEquals((Long) 380L, jobCounters.get(TaskCounter.MAP_OUTPUT_RECORDS.name())); Assert.assertEquals((Long) 320L, jobCounters.get(TaskCounter.REDUCE_INPUT_RECORDS.name())); Assert.assertEquals((Long) 1L, jobCounters.get(TaskCounter.REDUCE_OUTPUT_RECORDS.name())); // Ensure all tasks show up List<MRTaskInfo> mapTasks = mrJobInfo.getMapTasks(); List<MRTaskInfo> reduceTasks = mrJobInfo.getReduceTasks(); Assert.assertEquals(2, mapTasks.size()); Assert.assertEquals(1, reduceTasks.size()); MRTaskInfo mapTask1 = findByTaskId(mapTasks, mapTask1Name); MRTaskInfo mapTask2 = findByTaskId(mapTasks, mapTask2Name); MRTaskInfo reduceTask = findByTaskId(reduceTasks, reduceTaskName); // Check task-level counters Map<String, Long> mapTask1Counters = mapTask1.getCounters(); Assert.assertEquals((Long) 32L, mapTask1Counters.get(TaskCounter.MAP_INPUT_RECORDS.name())); Assert.assertEquals((Long) 320L, mapTask1Counters.get(TaskCounter.MAP_OUTPUT_RECORDS.name())); Map<String, Long> mapTask2Counters = mapTask2.getCounters(); Assert.assertEquals((Long) 6L, mapTask2Counters.get(TaskCounter.MAP_INPUT_RECORDS.name())); Assert.assertEquals((Long) 60L, mapTask2Counters.get(TaskCounter.MAP_OUTPUT_RECORDS.name())); Map<String, Long> reduceTaskCounters = reduceTask.getCounters(); Assert.assertEquals((Long) 320L, reduceTaskCounters.get(TaskCounter.REDUCE_INPUT_RECORDS.name())); Assert.assertEquals((Long) 1L, reduceTaskCounters.get(TaskCounter.REDUCE_OUTPUT_RECORDS.name())); // Checking progress float permittedProgressDelta = 0.01F; Assert.assertEquals(0.76F, mrJobInfo.getMapProgress(), permittedProgressDelta); Assert.assertEquals(0.52F, mrJobInfo.getReduceProgress(), permittedProgressDelta); Assert.assertEquals(1.0F, mapTask1.getProgress(), permittedProgressDelta); Assert.assertEquals(0.12F, mapTask2.getProgress(), permittedProgressDelta); Assert.assertEquals(0.76F, reduceTask.getProgress(), permittedProgressDelta); }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceMetricsWriter.java
License:Apache License
private void reportMapredStats(Counters jobCounters) throws IOException, InterruptedException { JobStatus jobStatus = jobConf.getStatus(); // map stats//from w w w .j a v a 2s . com float mapProgress = jobStatus.getMapProgress(); int runningMappers = 0; int runningReducers = 0; for (TaskReport tr : jobConf.getTaskReports(TaskType.MAP)) { reportMapTaskMetrics(tr); runningMappers += tr.getRunningTaskAttemptIds().size(); } for (TaskReport tr : jobConf.getTaskReports(TaskType.REDUCE)) { reportReduceTaskMetrics(tr); runningReducers += tr.getRunningTaskAttemptIds().size(); } int memoryPerMapper = jobConf.getConfiguration().getInt(Job.MAP_MEMORY_MB, Job.DEFAULT_MAP_MEMORY_MB); int memoryPerReducer = jobConf.getConfiguration().getInt(Job.REDUCE_MEMORY_MB, Job.DEFAULT_REDUCE_MEMORY_MB); long mapInputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_INPUT_RECORDS); long mapOutputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_RECORDS); long mapOutputBytes = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_BYTES); mapperMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (mapProgress * 100)); mapperMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, mapInputRecords); mapperMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, mapOutputRecords); mapperMetrics.gauge(MapReduceMetrics.METRIC_BYTES, mapOutputBytes); mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningMappers); mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningMappers * memoryPerMapper); LOG.trace("Reporting mapper stats: (completion, containers, memory) = ({}, {}, {})", (int) (mapProgress * 100), runningMappers, runningMappers * memoryPerMapper); // reduce stats float reduceProgress = jobStatus.getReduceProgress(); long reduceInputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_INPUT_RECORDS); long reduceOutputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_OUTPUT_RECORDS); reducerMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (reduceProgress * 100)); reducerMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, reduceInputRecords); reducerMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, reduceOutputRecords); reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningReducers); reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningReducers * memoryPerReducer); LOG.trace("Reporting reducer stats: (completion, containers, memory) = ({}, {}, {})", (int) (reduceProgress * 100), runningReducers, runningReducers * memoryPerReducer); }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceMetricsWriter.java
License:Apache License
private void reportMapTaskMetrics(TaskReport taskReport) { Counters counters = taskReport.getTaskCounters(); MetricsContext metricsContext = mapTaskMetricsCollectors.getUnchecked(taskReport.getTaskId()); metricsContext.gauge(MapReduceMetrics.METRIC_TASK_INPUT_RECORDS, getTaskCounter(counters, TaskCounter.MAP_INPUT_RECORDS)); metricsContext.gauge(MapReduceMetrics.METRIC_TASK_OUTPUT_RECORDS, getTaskCounter(counters, TaskCounter.MAP_OUTPUT_RECORDS)); metricsContext.gauge(MapReduceMetrics.METRIC_TASK_BYTES, getTaskCounter(counters, TaskCounter.MAP_OUTPUT_BYTES)); metricsContext.gauge(MapReduceMetrics.METRIC_TASK_COMPLETION, (long) (taskReport.getProgress() * 100)); }
From source file:co.cask.cdap.internal.app.services.http.handlers.WorkflowStatsSLAHttpHandlerTest.java
License:Apache License
@Test public void testCompare() throws Exception { deploy(WorkflowApp.class); String workflowName = "FunWorkflow"; String mapreduceName = "ClassicWordCount"; String sparkName = "SparkWorkflowTest"; Id.Program workflowProgram = Id.Workflow.from(Id.Namespace.DEFAULT, "WorkflowApp", ProgramType.WORKFLOW, workflowName);// ww w.j a v a 2 s . co m Id.Program mapreduceProgram = Id.Program.from(Id.Namespace.DEFAULT, "WorkflowApp", ProgramType.MAPREDUCE, mapreduceName); Id.Program sparkProgram = Id.Program.from(Id.Namespace.DEFAULT, "WorkflowApp", ProgramType.SPARK, sparkName); List<RunId> workflowRunIdList = setupRuns(workflowProgram, mapreduceProgram, sparkProgram, store, 2); RunId workflowRun1 = workflowRunIdList.get(0); RunId workflowRun2 = workflowRunIdList.get(1); String request = String.format("%s/namespaces/%s/apps/%s/workflows/%s/runs/%s/compare?other-run-id=%s", Constants.Gateway.API_VERSION_3, Id.Namespace.DEFAULT.getId(), WorkflowApp.class.getSimpleName(), workflowProgram.getId(), workflowRun1.toString(), workflowRun2.toString()); HttpResponse response = doGet(request); Collection<WorkflowStatsComparison.ProgramNodes> workflowStatistics = readResponse(response, new TypeToken<Collection<WorkflowStatsComparison.ProgramNodes>>() { }.getType()); Assert.assertNotNull(workflowStatistics.iterator().next()); Assert.assertEquals(2, workflowStatistics.size()); for (WorkflowStatsComparison.ProgramNodes node : workflowStatistics) { if (node.getProgramType() == ProgramType.MAPREDUCE) { Assert.assertEquals(38L, (long) node.getWorkflowProgramDetailsList().get(0).getMetrics() .get(TaskCounter.MAP_INPUT_RECORDS.name())); } } }
From source file:com.cloudera.sqoop.shims.Apache22HadoopShim.java
License:Apache License
@Override public long getNumMapInputRecords(Job job) throws IOException, InterruptedException { return job.getCounters().findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue(); }
From source file:com.kylinolap.job.cmd.JavaHadoopCmdOutput.java
License:Apache License
private void updateJobCounter() { try {//from w w w. j a v a 2s . c om this.output.append(job.getCounters().toString()).append("\n"); log.debug(job.getCounters().toString()); JobDAO jobDAO = JobDAO.getInstance(config); JobInstance jobInstance = jobDAO.getJob(jobInstanceID); JobStep jobStep = jobInstance.getSteps().get(jobStepID); long mapInputRecords = job.getCounters().findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue(); jobStep.putInfo(JobInstance.SOURCE_RECORDS_COUNT, String.valueOf(mapInputRecords)); long hdfsBytesWritten = job.getCounters().findCounter("FileSystemCounters", "HDFS_BYTES_WRITTEN") .getValue(); jobStep.putInfo(JobInstance.HDFS_BYTES_WRITTEN, String.valueOf(hdfsBytesWritten)); jobDAO.updateJobInstance(jobInstance); } catch (Exception e) { log.error(e.getLocalizedMessage(), e); output.append(e.getLocalizedMessage()); } }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 1) { JobBuilder.printUsage(this, "<job ID>"); return -1; }//from w ww .ja v a2 s. co m String jobID = args[0]; // vv NewMissingTemperatureFields Cluster cluster = new Cluster(getConf()); Job job = cluster.getJob(JobID.forName(jobID)); // ^^ NewMissingTemperatureFields if (job == null) { System.err.printf("No job with ID %s found.\n", jobID); return -1; } if (!job.isComplete()) { System.err.printf("Job %s is not complete.\n", jobID); return -1; } // vv NewMissingTemperatureFields Counters counters = job.getCounters(); long missing = counters.findCounter(MaxTemperatureWithCounters.Temperature.MISSING).getValue(); long total = counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue(); // ^^ NewMissingTemperatureFields System.out.printf("Records with missing temperature fields: %.2f%%\n", 100.0 * missing / total); return 0; }
From source file:org.apache.kylin.engine.mr.common.HadoopCmdOutput.java
License:Apache License
public void updateJobCounter() { try {/* w w w . j ava 2s . c om*/ Counters counters = job.getCounters(); if (counters == null) { String errorMsg = "no counters for job " + getMrJobId(); logger.warn(errorMsg); output.append(errorMsg); return; } this.output.append(counters.toString()).append("\n"); logger.debug(counters.toString()); String bytsWrittenCounterName = "HDFS_BYTES_WRITTEN"; String fsScheme = FileSystem.get(job.getConfiguration()).getScheme(); if (("wasb").equalsIgnoreCase(fsScheme)) { // for Azure blob store bytsWrittenCounterName = "WASB_BYTES_WRITTEN"; } mapInputRecords = String.valueOf(counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue()); hdfsBytesWritten = String .valueOf(counters.findCounter("FileSystemCounters", bytsWrittenCounterName).getValue()); rawInputBytesRead = String.valueOf(counters.findCounter(RawDataCounter.BYTES).getValue()); } catch (Exception e) { logger.error(e.getLocalizedMessage(), e); output.append(e.getLocalizedMessage()); } }