Example usage for org.apache.hadoop.mapreduce TaskType MAP

List of usage examples for org.apache.hadoop.mapreduce TaskType MAP

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskType MAP.

Prototype

TaskType MAP

To view the source code for org.apache.hadoop.mapreduce TaskType MAP.

Click Source Link

Usage

From source file:org.apache.tez.mapreduce.input.base.MRInputBase.java

License:Apache License

public List<Event> initialize() throws IOException {
    getContext().requestInitialMemory(0l, null); // mandatory call
    MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload = MRInputHelpers
            .parseMRInputPayload(getContext().getUserPayload());
    boolean isGrouped = mrUserPayload.getGroupingEnabled();
    Preconditions.checkArgument(mrUserPayload.hasSplits() == false,
            "Split information not expected in " + this.getClass().getName());
    Configuration conf = TezUtils.createConfFromByteString(mrUserPayload.getConfigurationBytes());
    this.jobConf = new JobConf(conf);
    useNewApi = this.jobConf.getUseNewMapper();
    if (isGrouped) {
        if (useNewApi) {
            jobConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR,
                    org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat.class.getName());
        } else {/*from w w  w.j  av a  2s. com*/
            jobConf.set("mapred.input.format.class",
                    org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.class.getName());
        }
    }

    // Add tokens to the jobConf - in case they are accessed within the RR / IF
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());

    TaskAttemptID taskAttemptId = new TaskAttemptID(
            new TaskID(Long.toString(getContext().getApplicationId().getClusterTimestamp()),
                    getContext().getApplicationId().getId(), TaskType.MAP, getContext().getTaskIndex()),
            getContext().getTaskAttemptNumber());

    jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString());
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber());

    this.inputRecordCounter = getContext().getCounters().findCounter(TaskCounter.INPUT_RECORDS_PROCESSED);

    return null;
}

From source file:org.apache.tez.mapreduce.processor.MRTask.java

License:Apache License

@Override
public void initialize() throws IOException, InterruptedException {

    DeprecatedKeys.init();/*from  w w  w  .j  a va  2 s .c o m*/

    processorContext = getContext();
    counters = processorContext.getCounters();
    this.taskAttemptId = new TaskAttemptID(
            new TaskID(Long.toString(processorContext.getApplicationId().getClusterTimestamp()),
                    processorContext.getApplicationId().getId(), (isMap ? TaskType.MAP : TaskType.REDUCE),
                    processorContext.getTaskIndex()),
            processorContext.getTaskAttemptNumber());

    UserPayload userPayload = processorContext.getUserPayload();
    Configuration conf = TezUtils.createConfFromUserPayload(userPayload);
    if (conf instanceof JobConf) {
        this.jobConf = (JobConf) conf;
    } else {
        this.jobConf = new JobConf(conf);
    }
    jobConf.set(Constants.TEZ_RUNTIME_TASK_ATTEMPT_ID, taskAttemptId.toString());
    jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString());
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, processorContext.getDAGAttemptNumber());

    LOG.info("MRTask.inited: taskAttemptId = " + taskAttemptId.toString());

    // TODO Post MRR
    // A single file per vertex will likely be a better solution. Does not
    // require translation - client can take care of this. Will work independent
    // of whether the configuration is for intermediate tasks or not. Has the
    // overhead of localizing multiple files per job - i.e. the client would
    // need to write these files to hdfs, add them as local resources per
    // vertex. A solution like this may be more practical once it's possible to
    // submit configuration parameters to the AM and effectively tasks via RPC.

    jobConf.set(MRJobConfig.VERTEX_NAME, processorContext.getTaskVertexName());

    if (LOG.isDebugEnabled() && userPayload != null) {
        Iterator<Entry<String, String>> iter = jobConf.iterator();
        String taskIdStr = taskAttemptId.getTaskID().toString();
        while (iter.hasNext()) {
            Entry<String, String> confEntry = iter.next();
            LOG.debug("TaskConf Entry" + ", taskId=" + taskIdStr + ", key=" + confEntry.getKey() + ", value="
                    + confEntry.getValue());
        }
    }

    configureMRTask();
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java

License:Apache License

private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration,
        final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass,
        final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass)
        throws Exception {

    final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration);
    final TaskAttemptContext job = new TaskAttemptContextImpl(configuration,
            new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0));

    int vertexCount = 0;
    int outEdgeCount = 0;
    int inEdgeCount = 0;

    final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent()
            ? ReflectionUtils.newInstance(outFormatClass.get(), configuration)
            : null;//from  w w w  . j  a v  a 2 s. c o  m
    final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null
            : outputFormat.getRecordWriter(job);

    boolean foundKeyValue = false;
    for (final FileSplit split : fileSplits) {
        logger.info("\treading file split {}", split.getPath().getName() + " ({}",
                split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)");
        final RecordReader reader = inputFormat.createRecordReader(split, job);

        float lastProgress = -1f;
        while (reader.nextKeyValue()) {
            //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue());
            final float progress = reader.getProgress();
            assertTrue(progress >= lastProgress);
            assertEquals(NullWritable.class, reader.getCurrentKey().getClass());
            final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue();
            if (null != writer)
                writer.write(NullWritable.get(), vertexWritable);
            vertexCount++;
            outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT));
            inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN));
            //
            final Vertex vertex = vertexWritable.get();
            assertEquals(Integer.class, vertex.id().getClass());
            if (vertex.value("name").equals("SUGAR MAGNOLIA")) {
                foundKeyValue = true;
                assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT)));
                assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN)));
            }
            lastProgress = progress;
        }
    }

    assertEquals(8049, outEdgeCount);
    assertEquals(8049, inEdgeCount);
    assertEquals(outEdgeCount, inEdgeCount);
    assertEquals(808, vertexCount);
    assertTrue(foundKeyValue);

    if (null != writer) {
        writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID()));
        for (int i = 1; i < 10; i++) {
            final File outputDirectory = new File(
                    new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI());
            final List<FileSplit> splits = generateFileSplits(
                    new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/"
                            + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0"
                            + "/part-m-00000"),
                    i);
            validateFileSplits(splits, configuration, inputFormatClass, Optional.empty());
        }
    }
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeSameRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);//  ww w . j  a  v  a2s.c om
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(!fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00001")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeNewRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);// w ww.  j  a  v a 2  s  . c  o m
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertFalse(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue());
    assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00002")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMultipleLayouts() throws Exception {
    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);

    final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    writer.write(defaultEntry, NW);/*from  w w w. ja v a 2 s  . c o m*/
    final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024));
    writer.write(inMemoryEntry, NW);

    try {
        // Test with an invalid locality group ID:
        final ColumnId invalid = new ColumnId(1234);
        assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid));
        writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW);
        fail("Output format did not fail on unknown locality group IDs.");
    } catch (IllegalArgumentException iae) {
        LOG.info("Expected error: " + iae);
    }

    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue());
    assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue());

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.platform.Hadoop1xKijiMRBridge.java

License:Apache License

/** {@inheritDoc} */
@Override//from   w ww  .j av a  2 s.  com
public TaskAttemptID newTaskAttemptID(String jtIdentifier, int jobId, TaskType type, int taskId, int id) {
    // In Hadoop 1.0, TaskType isn't an arg to TaskAttemptID; instead, there's just a
    // boolean indicating whether it's a map task or not.
    boolean isMap = type == TaskType.MAP;
    return new TaskAttemptID(jtIdentifier, jobId, isMap, taskId, id);
}

From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetOutputFormat.java

License:Apache License

@Override
public IPentahoRecordWriter createRecordWriter() throws Exception {
    if (outputFile == null) {
        throw new RuntimeException("Output file is not defined");
    }//www.  j  a v  a2s. c  o  m
    if ((outputFields == null) || (outputFields.size() == 0)) {
        throw new RuntimeException("Schema is not defined");
    }

    return inClassloader(() -> {
        FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat(
                new PentahoParquetWriteSupport(outputFields));

        TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11);
        TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID);
        try {

            ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat
                    .getRecordWriter(task);
            return new PentahoParquetRecordWriter(recordWriter, task);
        } catch (IOException e) {
            throw new RuntimeException("Some error accessing parquet files", e);
        } catch (InterruptedException e) {
            // logging here
            e.printStackTrace();
            throw new RuntimeException("This should never happen " + e);
        }
    });
}

From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetRecordWriterTest.java

License:Apache License

@Before
public void setUp() throws Exception {

    ConfigurationProxy conf = new ConfigurationProxy();
    conf.set("fs.defaultFS", "file:///");
    Job job = Job.getInstance(conf);//from  w  ww. ja v a 2 s . c  om

    tempFile = Files.createTempDirectory("parquet");

    org.apache.hadoop.fs.Path outputFile = new org.apache.hadoop.fs.Path(tempFile + PARQUET_FILE_NAME);

    ParquetOutputFormat.setOutputPath(job, outputFile.getParent());

    TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11);

    task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID);
}

From source file:org.pentaho.hadoop.shim.common.format.PentahoParquetOutputFormat.java

License:Apache License

@Override
public IPentahoRecordWriter createRecordWriter() throws Exception {
    if (outputFile == null) {
        throw new RuntimeException("Output file is not defined");
    }/*from www .  ja  v  a 2 s .co m*/
    if (schema == null) {
        throw new RuntimeException("Schema is not defined");
    }

    return inClassloader(() -> {
        FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat(
                new PentahoParquetWriteSupport(schema));

        TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11);
        TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID);
        try {

            ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat
                    .getRecordWriter(task);
            return new PentahoParquetRecordWriter(recordWriter, task);
        } catch (IOException e) {
            throw new RuntimeException("Some error accessing parquet files", e);
        } catch (InterruptedException e) {
            // logging here
            e.printStackTrace();
            throw new RuntimeException("This should never happen " + e);
        }
    });
}