List of usage examples for org.apache.hadoop.mapreduce TaskType MAP
TaskType MAP
To view the source code for org.apache.hadoop.mapreduce TaskType MAP.
Click Source Link
From source file:org.apache.tez.mapreduce.input.base.MRInputBase.java
License:Apache License
public List<Event> initialize() throws IOException { getContext().requestInitialMemory(0l, null); // mandatory call MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload = MRInputHelpers .parseMRInputPayload(getContext().getUserPayload()); boolean isGrouped = mrUserPayload.getGroupingEnabled(); Preconditions.checkArgument(mrUserPayload.hasSplits() == false, "Split information not expected in " + this.getClass().getName()); Configuration conf = TezUtils.createConfFromByteString(mrUserPayload.getConfigurationBytes()); this.jobConf = new JobConf(conf); useNewApi = this.jobConf.getUseNewMapper(); if (isGrouped) { if (useNewApi) { jobConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat.class.getName()); } else {/*from w w w.j av a 2s. com*/ jobConf.set("mapred.input.format.class", org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.class.getName()); } } // Add tokens to the jobConf - in case they are accessed within the RR / IF jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials()); TaskAttemptID taskAttemptId = new TaskAttemptID( new TaskID(Long.toString(getContext().getApplicationId().getClusterTimestamp()), getContext().getApplicationId().getId(), TaskType.MAP, getContext().getTaskIndex()), getContext().getTaskAttemptNumber()); jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber()); this.inputRecordCounter = getContext().getCounters().findCounter(TaskCounter.INPUT_RECORDS_PROCESSED); return null; }
From source file:org.apache.tez.mapreduce.processor.MRTask.java
License:Apache License
@Override public void initialize() throws IOException, InterruptedException { DeprecatedKeys.init();/*from w w w .j a va 2 s .c o m*/ processorContext = getContext(); counters = processorContext.getCounters(); this.taskAttemptId = new TaskAttemptID( new TaskID(Long.toString(processorContext.getApplicationId().getClusterTimestamp()), processorContext.getApplicationId().getId(), (isMap ? TaskType.MAP : TaskType.REDUCE), processorContext.getTaskIndex()), processorContext.getTaskAttemptNumber()); UserPayload userPayload = processorContext.getUserPayload(); Configuration conf = TezUtils.createConfFromUserPayload(userPayload); if (conf instanceof JobConf) { this.jobConf = (JobConf) conf; } else { this.jobConf = new JobConf(conf); } jobConf.set(Constants.TEZ_RUNTIME_TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, processorContext.getDAGAttemptNumber()); LOG.info("MRTask.inited: taskAttemptId = " + taskAttemptId.toString()); // TODO Post MRR // A single file per vertex will likely be a better solution. Does not // require translation - client can take care of this. Will work independent // of whether the configuration is for intermediate tasks or not. Has the // overhead of localizing multiple files per job - i.e. the client would // need to write these files to hdfs, add them as local resources per // vertex. A solution like this may be more practical once it's possible to // submit configuration parameters to the AM and effectively tasks via RPC. jobConf.set(MRJobConfig.VERTEX_NAME, processorContext.getTaskVertexName()); if (LOG.isDebugEnabled() && userPayload != null) { Iterator<Entry<String, String>> iter = jobConf.iterator(); String taskIdStr = taskAttemptId.getTaskID().toString(); while (iter.hasNext()) { Entry<String, String> confEntry = iter.next(); LOG.debug("TaskConf Entry" + ", taskId=" + taskIdStr + ", key=" + confEntry.getKey() + ", value=" + confEntry.getValue()); } } configureMRTask(); }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java
License:Apache License
private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration, final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass, final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass) throws Exception { final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration); final TaskAttemptContext job = new TaskAttemptContextImpl(configuration, new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0)); int vertexCount = 0; int outEdgeCount = 0; int inEdgeCount = 0; final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent() ? ReflectionUtils.newInstance(outFormatClass.get(), configuration) : null;//from w w w . j a v a 2 s. c o m final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null : outputFormat.getRecordWriter(job); boolean foundKeyValue = false; for (final FileSplit split : fileSplits) { logger.info("\treading file split {}", split.getPath().getName() + " ({}", split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)"); final RecordReader reader = inputFormat.createRecordReader(split, job); float lastProgress = -1f; while (reader.nextKeyValue()) { //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue()); final float progress = reader.getProgress(); assertTrue(progress >= lastProgress); assertEquals(NullWritable.class, reader.getCurrentKey().getClass()); final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue(); if (null != writer) writer.write(NullWritable.get(), vertexWritable); vertexCount++; outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT)); inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN)); // final Vertex vertex = vertexWritable.get(); assertEquals(Integer.class, vertex.id().getClass()); if (vertex.value("name").equals("SUGAR MAGNOLIA")) { foundKeyValue = true; assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT))); assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN))); } lastProgress = progress; } } assertEquals(8049, outEdgeCount); assertEquals(8049, inEdgeCount); assertEquals(outEdgeCount, inEdgeCount); assertEquals(808, vertexCount); assertTrue(foundKeyValue); if (null != writer) { writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID())); for (int i = 1; i < 10; i++) { final File outputDirectory = new File( new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI()); final List<FileSplit> splits = generateFileSplits( new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/" + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0" + "/part-m-00000"), i); validateFileSplits(splits, configuration, inputFormatClass, Optional.empty()); } } }
From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeSameRow() throws Exception { final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW);// ww w . j a v a2s.c om writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(!fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00001"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeNewRow() throws Exception { final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW);// w ww. j a v a 2 s . c o m writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertFalse(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue()); assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00002"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java
License:Apache License
@Test public void testMultipleLayouts() throws Exception { final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); writer.write(defaultEntry, NW);/*from w w w. ja v a 2 s . c o m*/ final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024)); writer.write(inMemoryEntry, NW); try { // Test with an invalid locality group ID: final ColumnId invalid = new ColumnId(1234); assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid)); writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW); fail("Output format did not fail on unknown locality group IDs."); } catch (IllegalArgumentException iae) { LOG.info("Expected error: " + iae); } writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue()); assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue()); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:org.kiji.mapreduce.platform.Hadoop1xKijiMRBridge.java
License:Apache License
/** {@inheritDoc} */ @Override//from w ww .j av a 2 s. com public TaskAttemptID newTaskAttemptID(String jtIdentifier, int jobId, TaskType type, int taskId, int id) { // In Hadoop 1.0, TaskType isn't an arg to TaskAttemptID; instead, there's just a // boolean indicating whether it's a map task or not. boolean isMap = type == TaskType.MAP; return new TaskAttemptID(jtIdentifier, jobId, isMap, taskId, id); }
From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetOutputFormat.java
License:Apache License
@Override public IPentahoRecordWriter createRecordWriter() throws Exception { if (outputFile == null) { throw new RuntimeException("Output file is not defined"); }//www. j a v a2s. c o m if ((outputFields == null) || (outputFields.size() == 0)) { throw new RuntimeException("Schema is not defined"); } return inClassloader(() -> { FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat( new PentahoParquetWriteSupport(outputFields)); TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11); TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID); try { ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat .getRecordWriter(task); return new PentahoParquetRecordWriter(recordWriter, task); } catch (IOException e) { throw new RuntimeException("Some error accessing parquet files", e); } catch (InterruptedException e) { // logging here e.printStackTrace(); throw new RuntimeException("This should never happen " + e); } }); }
From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetRecordWriterTest.java
License:Apache License
@Before public void setUp() throws Exception { ConfigurationProxy conf = new ConfigurationProxy(); conf.set("fs.defaultFS", "file:///"); Job job = Job.getInstance(conf);//from w ww. ja v a 2 s . c om tempFile = Files.createTempDirectory("parquet"); org.apache.hadoop.fs.Path outputFile = new org.apache.hadoop.fs.Path(tempFile + PARQUET_FILE_NAME); ParquetOutputFormat.setOutputPath(job, outputFile.getParent()); TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11); task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID); }
From source file:org.pentaho.hadoop.shim.common.format.PentahoParquetOutputFormat.java
License:Apache License
@Override public IPentahoRecordWriter createRecordWriter() throws Exception { if (outputFile == null) { throw new RuntimeException("Output file is not defined"); }/*from www . ja v a 2 s .co m*/ if (schema == null) { throw new RuntimeException("Schema is not defined"); } return inClassloader(() -> { FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat( new PentahoParquetWriteSupport(schema)); TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11); TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID); try { ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat .getRecordWriter(task); return new PentahoParquetRecordWriter(recordWriter, task); } catch (IOException e) { throw new RuntimeException("Some error accessing parquet files", e); } catch (InterruptedException e) { // logging here e.printStackTrace(); throw new RuntimeException("This should never happen " + e); } }); }