List of usage examples for org.apache.hadoop.mapreduce TaskType MAP
TaskType MAP
To view the source code for org.apache.hadoop.mapreduce TaskType MAP.
Click Source Link
From source file:cascading.stats.hadoop.HadoopNodeStats.java
License:Open Source License
/** * Retrieves the TaskReports via the mapreduce API. * * @param kind The kind of TaskReport to retrieve. * @return An array of TaskReports, but never <code>nul</code>. * @throws IOException/* w w w. j av a 2 s.co m*/ */ private TaskReport[] retrieveTaskReports(HadoopSliceStats.Kind kind) throws IOException, InterruptedException { Job job = HadoopStepStats.getJob(getJobStatusClient()); if (job == null) return new TaskReport[0]; switch (kind) { case MAPPER: return job.getTaskReports(TaskType.MAP); case REDUCER: return job.getTaskReports(TaskType.REDUCE); case SETUP: return job.getTaskReports(TaskType.JOB_SETUP); case CLEANUP: return job.getTaskReports(TaskType.JOB_CLEANUP); default: return new TaskReport[0]; } }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceMetricsWriter.java
License:Apache License
private void reportMapredStats(Counters jobCounters) throws IOException, InterruptedException { JobStatus jobStatus = jobConf.getStatus(); // map stats//from w w w .j a va 2 s . c o m float mapProgress = jobStatus.getMapProgress(); int runningMappers = 0; int runningReducers = 0; for (TaskReport tr : jobConf.getTaskReports(TaskType.MAP)) { reportMapTaskMetrics(tr); runningMappers += tr.getRunningTaskAttemptIds().size(); } for (TaskReport tr : jobConf.getTaskReports(TaskType.REDUCE)) { reportReduceTaskMetrics(tr); runningReducers += tr.getRunningTaskAttemptIds().size(); } int memoryPerMapper = jobConf.getConfiguration().getInt(Job.MAP_MEMORY_MB, Job.DEFAULT_MAP_MEMORY_MB); int memoryPerReducer = jobConf.getConfiguration().getInt(Job.REDUCE_MEMORY_MB, Job.DEFAULT_REDUCE_MEMORY_MB); long mapInputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_INPUT_RECORDS); long mapOutputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_RECORDS); long mapOutputBytes = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_BYTES); mapperMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (mapProgress * 100)); mapperMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, mapInputRecords); mapperMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, mapOutputRecords); mapperMetrics.gauge(MapReduceMetrics.METRIC_BYTES, mapOutputBytes); mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningMappers); mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningMappers * memoryPerMapper); LOG.trace("Reporting mapper stats: (completion, containers, memory) = ({}, {}, {})", (int) (mapProgress * 100), runningMappers, runningMappers * memoryPerMapper); // reduce stats float reduceProgress = jobStatus.getReduceProgress(); long reduceInputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_INPUT_RECORDS); long reduceOutputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_OUTPUT_RECORDS); reducerMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (reduceProgress * 100)); reducerMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, reduceInputRecords); reducerMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, reduceOutputRecords); reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningReducers); reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningReducers * memoryPerReducer); LOG.trace("Reporting reducer stats: (completion, containers, memory) = ({}, {}, {})", (int) (reduceProgress * 100), runningReducers, runningReducers * memoryPerReducer); }
From source file:com.asakusafw.runtime.compatibility.hadoop2.JobCompatibilityHadoop2.java
License:Apache License
@Override public TaskID newMapTaskId(JobID jobId, int id) { if (jobId == null) { throw new IllegalArgumentException("jobId must not be null"); //$NON-NLS-1$ }/*w ww . j av a2 s . com*/ if (TASK_ID_MR2 != null) { TaskID result = newTaskIdMr2(jobId, TaskType.MAP, id); if (result != null) { return result; } } return newTaskIdMr1(jobId, true, id); }
From source file:com.asakusafw.runtime.compatibility.hadoop2.JobCompatibilityHadoop2.java
License:Apache License
@Override public Counter getTaskOutputRecordCounter(TaskInputOutputContext<?, ?, ?, ?> context) { if (context == null) { throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$ }// www . j av a 2s. c om if (context.getTaskAttemptID().getTaskType() == TaskType.MAP) { return context.getCounter(TaskCounter.MAP_OUTPUT_RECORDS); } else { return context.getCounter(TaskCounter.REDUCE_OUTPUT_RECORDS); } }
From source file:com.asakusafw.testdriver.file.FileDeployer.java
License:Apache License
/** * Opens output for the specified {@link OutputFormat}. * @param <V> value type//from w w w . j a v a 2 s . c o m * @param definition target model definition * @param destination output location * @param output format * @return the opened {@link ModelOutput} * @throws IOException if failed to open the target output * @throws IllegalArgumentException if some parameters were {@code null} */ public <V> ModelOutput<V> openOutput(DataModelDefinition<V> definition, final String destination, FileOutputFormat<? super NullWritable, ? super V> output) throws IOException { assert destination != null; assert output != null; LOG.debug("Opening {} using {}", destination, output.getClass().getName()); Job job = Job.getInstance(configuration); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(definition.getModelClass()); final File temporaryDir = File.createTempFile("asakusa", ".tempdir"); if (temporaryDir.delete() == false || temporaryDir.mkdirs() == false) { throw new IOException("Failed to create temporary directory"); } LOG.debug("Using staging deploy target: {}", temporaryDir); URI uri = temporaryDir.toURI(); FileOutputFormat.setOutputPath(job, new Path(uri)); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)); FileOutputFormatDriver<V> result = new FileOutputFormatDriver<V>(context, output, NullWritable.get()) { @Override public void close() throws IOException { super.close(); deploy(destination, temporaryDir); } }; return result; }
From source file:com.asakusafw.testdriver.file.FileExporterRetriever.java
License:Apache License
@Override public <V> DataModelSource createSource(DataModelDefinition<V> definition, FileExporterDescription description, TestContext context) throws IOException { LOG.info("??????: {}", description); VariableTable variables = createVariables(context); checkType(definition, description);/*from www .j av a 2s . c om*/ Configuration conf = configurations.newInstance(); Job job = Job.getInstance(conf); String resolved = variables.parse(description.getPathPrefix(), false); FileInputFormat.setInputPaths(job, new Path(resolved)); TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)); FileInputFormat<?, V> format = getOpposite(conf, description.getOutputFormat()); FileInputFormatDriver<V> result = new FileInputFormatDriver<>(definition, taskContext, format); return result; }
From source file:com.linkedin.drelephant.mapreduce.fetchers.MapReduceFSFetcherHadoop2.java
License:Apache License
@Override public MapReduceApplicationData fetchData(AnalyticJob job) throws IOException { DataFiles files = getHistoryFiles(job); String confFile = files.getJobConfPath(); String histFile = files.getJobHistPath(); String appId = job.getAppId(); String jobId = Utils.getJobIdFromApplicationId(appId); MapReduceApplicationData jobData = new MapReduceApplicationData(); jobData.setAppId(appId).setJobId(jobId); // Fetch job config Configuration jobConf = new Configuration(false); jobConf.addResource(_fs.open(new Path(confFile)), confFile); Properties jobConfProperties = new Properties(); for (Map.Entry<String, String> entry : jobConf) { jobConfProperties.put(entry.getKey(), entry.getValue()); }/*from w w w . j a va2 s . co m*/ jobData.setJobConf(jobConfProperties); // Check if job history file is too large and should be throttled if (_fs.getFileStatus(new Path(histFile)).getLen() > _maxLogSizeInMB * FileUtils.ONE_MB) { String errMsg = "The history log of MapReduce application: " + appId + " is over the limit size of " + _maxLogSizeInMB + " MB, the parsing process gets throttled."; logger.warn(errMsg); jobData.setDiagnosticInfo(errMsg); jobData.setSucceeded(false); // set succeeded to false to avoid heuristic analysis return jobData; } // Analyze job history file JobHistoryParser parser = new JobHistoryParser(_fs, histFile); JobHistoryParser.JobInfo jobInfo = parser.parse(); IOException parseException = parser.getParseException(); if (parseException != null) { throw new RuntimeException("Could not parse history file " + histFile, parseException); } jobData.setSubmitTime(jobInfo.getSubmitTime()); jobData.setStartTime(jobInfo.getLaunchTime()); jobData.setFinishTime(jobInfo.getFinishTime()); String state = jobInfo.getJobStatus(); if (state.equals("SUCCEEDED")) { jobData.setSucceeded(true); // Fetch job counter MapReduceCounterData jobCounter = getCounterData(jobInfo.getTotalCounters()); // Fetch task data Map<TaskID, JobHistoryParser.TaskInfo> allTasks = jobInfo.getAllTasks(); List<JobHistoryParser.TaskInfo> mapperInfoList = new ArrayList<JobHistoryParser.TaskInfo>(); List<JobHistoryParser.TaskInfo> reducerInfoList = new ArrayList<JobHistoryParser.TaskInfo>(); for (JobHistoryParser.TaskInfo taskInfo : allTasks.values()) { if (taskInfo.getTaskType() == TaskType.MAP) { mapperInfoList.add(taskInfo); } else { reducerInfoList.add(taskInfo); } } if (jobInfo.getTotalMaps() > MAX_SAMPLE_SIZE) { logger.debug(jobId + " total mappers: " + mapperInfoList.size()); } if (jobInfo.getTotalReduces() > MAX_SAMPLE_SIZE) { logger.debug(jobId + " total reducers: " + reducerInfoList.size()); } MapReduceTaskData[] mapperList = getTaskData(jobId, mapperInfoList); MapReduceTaskData[] reducerList = getTaskData(jobId, reducerInfoList); jobData.setCounters(jobCounter).setMapperData(mapperList).setReducerData(reducerList); } else if (state.equals("FAILED")) { jobData.setSucceeded(false); jobData.setDiagnosticInfo(jobInfo.getErrorInfo()); } else { // Should not reach here throw new RuntimeException("Job state not supported. Should be either SUCCEEDED or FAILED"); } return jobData; }
From source file:com.linkedin.drelephant.mapreduce.fetchers.MapReduceFSFetcherHadoop2.java
License:Apache License
private long[] getTaskExecTime(JobHistoryParser.TaskAttemptInfo attempInfo) { long startTime = attempInfo.getStartTime(); long finishTime = attempInfo.getFinishTime(); boolean isMapper = (attempInfo.getTaskType() == TaskType.MAP); long[] time;/*w ww.j a v a 2 s . c o m*/ if (isMapper) { time = new long[] { finishTime - startTime, 0, 0, startTime, finishTime }; } else { long shuffleFinishTime = attempInfo.getShuffleFinishTime(); long mergeFinishTime = attempInfo.getSortFinishTime(); time = new long[] { finishTime - startTime, shuffleFinishTime - startTime, mergeFinishTime - shuffleFinishTime, startTime, finishTime }; } return time; }
From source file:com.marklogic.contentpump.LocalJobRunner.java
License:Apache License
/** * Run the job. Get the input splits, create map tasks and submit it to * the thread pool if there is one; otherwise, runs the the task one by * one.// w w w . jav a 2s . c o m * * @param <INKEY> * @param <INVALUE> * @param <OUTKEY> * @param <OUTVALUE> * @throws Exception */ @SuppressWarnings("unchecked") public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run() throws Exception { Configuration conf = job.getConfiguration(); InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils .newInstance(job.getInputFormatClass(), conf); List<InputSplit> splits = inputFormat.getSplits(job); T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]); // sort the splits into order based on size, so that the biggest // goes first Arrays.sort(array, new SplitLengthComparator()); OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils .newInstance(job.getOutputFormatClass(), conf); Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass(); Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils .newInstance(mapperClass, conf); try { outputFormat.checkOutputSpecs(job); } catch (Exception ex) { if (LOG.isDebugEnabled()) { LOG.debug("Error checking output specification: ", ex); } else { LOG.error("Error checking output specification: "); LOG.error(ex.getMessage()); } return; } conf = job.getConfiguration(); progress = new AtomicInteger[splits.size()]; for (int i = 0; i < splits.size(); i++) { progress[i] = new AtomicInteger(); } Monitor monitor = new Monitor(); monitor.start(); reporter = new ContentPumpReporter(); List<Future<Object>> taskList = new ArrayList<Future<Object>>(); for (int i = 0; i < array.length; i++) { InputSplit split = array[i]; if (pool != null) { LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>( inputFormat, outputFormat, conf, i, split, reporter, progress[i]); availableThreads = assignThreads(i, array.length); Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass(); if (availableThreads > 1 && availableThreads != threadsPerSplit) { // possible runtime adjustment if (runtimeMapperClass != (Class) MultithreadedMapper.class) { runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads); } if (runtimeMapperClass != mapperClass) { task.setMapperClass(runtimeMapperClass); } if (runtimeMapperClass == (Class) MultithreadedMapper.class) { task.setThreadCount(availableThreads); if (LOG.isDebugEnabled()) { LOG.debug("Thread Count for Split#" + i + " : " + availableThreads); } } } if (runtimeMapperClass == (Class) MultithreadedMapper.class) { synchronized (pool) { taskList.add(pool.submit(task)); pool.wait(); } } else { pool.submit(task); } } else { // single-threaded JobID jid = new JobID(); TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i); TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0); TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId); RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context); RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context); OutputCommitter committer = outputFormat.getOutputCommitter(context); TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]); Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId, trackingReader, writer, committer, reporter, split); trackingReader.initialize(split, mapperContext); // no thread pool (only 1 thread specified) Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass(); mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class); mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass, mapperContext.getConfiguration()); mapper.run(mapperContext); trackingReader.close(); writer.close(mapperContext); committer.commitTask(context); } } // wait till all tasks are done if (pool != null) { for (Future<Object> f : taskList) { f.get(); } pool.shutdown(); while (!pool.awaitTermination(1, TimeUnit.DAYS)) ; jobComplete.set(true); } monitor.interrupt(); monitor.join(1000); // report counters Iterator<CounterGroup> groupIt = reporter.counters.iterator(); while (groupIt.hasNext()) { CounterGroup group = groupIt.next(); LOG.info(group.getDisplayName() + ": "); Iterator<Counter> counterIt = group.iterator(); while (counterIt.hasNext()) { Counter counter = counterIt.next(); LOG.info(counter.getDisplayName() + ": " + counter.getValue()); } } LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec"); }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeSameRow() throws Exception { final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW);//from w w w .ja v a2s . co m writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(!fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00001"))); mFormat.getOutputCommitter(context).commitTask(context); }