List of usage examples for org.apache.hadoop.mapreduce TaskType MAP
TaskType MAP
To view the source code for org.apache.hadoop.mapreduce TaskType MAP.
Click Source Link
From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public List<DataSegment> run() throws IOException { final JobConf jobConf = new JobConf(); jobConf.setKeepFailedTaskFiles(false); for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) { jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()"); }/*from w w w . j ava 2 s .c o m*/ final List<DataSegment> segments = converterConfig.getSegments(); if (segments.isEmpty()) { throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource()); } converterConfigIntoConfiguration(converterConfig, segments, jobConf); jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); setJobName(jobConf, segments); if (converterConfig.getJobPriority() != null) { jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority())); } final Job job = Job.getInstance(jobConf); job.setInputFormatClass(ConfigInputFormat.class); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapSpeculativeExecution(false); job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job); Throwable throwable = null; try { job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); final boolean success = job.waitForCompletion(true); if (!success) { final TaskReport[] reports = job.getTaskReports(TaskType.MAP); if (reports != null) { for (final TaskReport report : reports) { log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); } } return null; } try { loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); } catch (IOException ex) { log.error(ex, "Could not fetch counters"); } final JobID jobID = job.getJobID(); final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true); final List<Path> goodPaths = new ArrayList<>(); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); if (locatedFileStatus.isFile()) { final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY)); } } } if (goodPaths.isEmpty()) { log.warn("No good data found at [%s]", jobDir); return null; } final List<DataSegment> returnList = ImmutableList .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() { @Nullable @Override public DataSegment apply(final Path input) { try { if (!fs.exists(input)) { throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir); } } catch (final IOException e) { throw Throwables.propagate(e); } try (final InputStream stream = fs.open(input)) { return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class); } catch (final IOException e) { throw Throwables.propagate(e); } } })); if (returnList.size() == segments.size()) { return returnList; } else { throw new ISE( "Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir); } } catch (InterruptedException | ClassNotFoundException e) { RuntimeException exception = Throwables.propagate(e); throwable = exception; throw exception; } catch (Throwable t) { throwable = t; throw t; } finally { try { cleanup(job); } catch (IOException e) { if (throwable != null) { throwable.addSuppressed(e); } else { log.error(e, "Could not clean up job [%s]", job.getJobID()); } } } }
From source file:org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceJobSuggestionContext.java
License:Apache License
private MapReduceJobSuggestionContext buildContext() { avgMapTimeInSec = avgReduceTimeInSec = avgShuffleTimeInSec = 0; numMaps = jobconf.getLong(NUM_MAPS, 0); numReduces = jobconf.getLong(NUM_REDUCES, 0); for (TaskAttemptExecutionAPIEntity attempt : job.getCompletedTaskAttemptsMap().values()) { String taskType = getTaskType(attempt); if (Constants.TaskType.MAP.toString().equalsIgnoreCase(taskType)) { long mapTime = attempt.getEndTime() - attempt.getStartTime(); avgMapTimeInSec += mapTime;//from w w w . jav a 2 s . c o m if (firstMap == null || firstMap.getStartTime() > attempt.getStartTime()) { firstMap = attempt; } if (lastMap == null || lastMap.getEndTime() < attempt.getEndTime()) { lastMap = attempt; } if (worstMap == null || (worstMap.getEndTime() - worstMap.getStartTime()) < mapTime) { worstMap = attempt; } long tmpMem = getMinimumIOSortMemory(attempt); if (tmpMem > minMapSpillMemBytes) { minMapSpillMemBytes = tmpMem; } } else if (TaskType.REDUCE.toString().equalsIgnoreCase(taskType)) { long shuffleTime = attempt.getShuffleFinishTime() - attempt.getStartTime(); avgShuffleTimeInSec += shuffleTime; if (firstShuffle == null || firstShuffle.getStartTime() > attempt.getStartTime()) { firstShuffle = attempt; } if (lastShuffle == null || lastShuffle.getShuffleFinishTime() < attempt.getShuffleFinishTime()) { lastShuffle = attempt; } if (worstShuffle == null || (worstShuffle.getShuffleFinishTime() - worstShuffle.getStartTime()) < shuffleTime) { worstShuffle = attempt; } long reduceTime = attempt.getEndTime() - attempt.getShuffleFinishTime(); avgReduceTimeInSec += reduceTime; if (firstReduce == null || firstReduce.getStartTime() > attempt.getStartTime()) { firstReduce = attempt; } if (lastReduce == null || lastReduce.getEndTime() < attempt.getEndTime()) { lastReduce = attempt; } if (worstReduce == null || (worstReduce.getEndTime() - worstReduce.getShuffleFinishTime()) < reduceTime) { worstReduce = attempt; } } } if (numMaps > 0) { avgMapTimeInSec = avgMapTimeInSec / numMaps / DateTimeUtil.ONESECOND; } if (numReduces > 0) { avgReduceTimeInSec = avgReduceTimeInSec / numReduces / DateTimeUtil.ONESECOND; avgShuffleTimeInSec = avgShuffleTimeInSec / numReduces / DateTimeUtil.ONESECOND; } return this; }
From source file:org.apache.hcatalog.shims.HCatHadoopShims23.java
License:Apache License
@Override public TaskID createTaskID() { return new TaskID("", 0, TaskType.MAP, 0); }
From source file:org.apache.hcatalog.shims.HCatHadoopShims23.java
License:Apache License
@Override public TaskAttemptID createTaskAttemptID() { return new TaskAttemptID("", 0, TaskType.MAP, 0, 0); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java
License:Apache License
/** * @param type Task type./* w ww. j a v a 2s . co m*/ * @return Hadoop task type. */ private TaskType taskType(HadoopTaskType type) { switch (type) { case SETUP: return TaskType.JOB_SETUP; case MAP: case COMBINE: return TaskType.MAP; case REDUCE: return TaskType.REDUCE; case COMMIT: case ABORT: return TaskType.JOB_CLEANUP; default: return null; } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java
License:Apache License
/** * @param type Task type./* w w w. j av a 2 s .c om*/ * @return Hadoop task type. */ private TaskType taskType(GridHadoopTaskType type) { switch (type) { case SETUP: return TaskType.JOB_SETUP; case MAP: case COMBINE: return TaskType.MAP; case REDUCE: return TaskType.REDUCE; case COMMIT: case ABORT: return TaskType.JOB_CLEANUP; default: return null; } }
From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java
License:Apache License
private TaskAttemptID createAttemptID(int jobID, int taskID, int id) { return new TaskAttemptID("outputTest", jobID, TaskType.MAP, taskID, 1); }
From source file:org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormatTests.java
License:Apache License
/** * Tests output// w ww .j a va 2 s . c o m * * @param f * File to output to * @param num * Number of tuples to output * @throws IOException * @throws InterruptedException */ protected final void testOutput(File f, int num) throws IOException, InterruptedException { // Prepare configuration Configuration config = this.prepareConfiguration(); // Set up fake job OutputFormat<NullWritable, T> outputFormat = this.getOutputFormat(); Job job = Job.getInstance(config); job.setOutputFormatClass(outputFormat.getClass()); this.addOutputPath(f, job.getConfiguration(), job); JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); Assert.assertNotNull(FileOutputFormat.getOutputPath(context)); // Output the data TaskAttemptID id = new TaskAttemptID("outputTest", 1, TaskType.MAP, 1, 1); TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), id); RecordWriter<NullWritable, T> writer = outputFormat.getRecordWriter(taskContext); Iterator<T> tuples = this.generateTuples(num); while (tuples.hasNext()) { writer.write(NullWritable.get(), tuples.next()); } writer.close(taskContext); // Check output File outputFile = this.findOutputFile(this.folder.getRoot(), context); Assert.assertNotNull(outputFile); this.checkTuples(outputFile, num); }
From source file:org.apache.mnemonic.mapreduce.MneMapreduceBufferDataTest.java
License:Apache License
@BeforeClass public void setUp() throws IOException { m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom();//from w ww . j av a2 s . c o m m_partfns = new ArrayList<String>(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "buffer-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] { DurableType.BUFFER }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] {}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] { DurableType.BUFFER }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] {}); }
From source file:org.apache.mnemonic.mapreduce.MneMapreduceChunkDataTest.java
License:Apache License
@BeforeClass public void setUp() throws Exception { m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom();//from w w w .ja va2 s .co m unsafe = Utils.getUnsafe(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "chunk-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] { DurableType.CHUNK }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] {}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] { DurableType.CHUNK }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] {}); }