Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:org.apache.solr.hadoop.SolrRecordWriter.java

License:Apache License

public SolrRecordWriter(TaskAttemptContext context, Path outputShardDir, int batchSize) {
    this.batchSize = batchSize;
    this.batch = new ArrayList(batchSize);
    Configuration conf = context.getConfiguration();

    // setLogLevel("org.apache.solr.core", "WARN");
    // setLogLevel("org.apache.solr.update", "WARN");

    heartBeater = new HeartBeater(context);
    try {// w  w  w  .  jav  a 2s .  c om
        heartBeater.needHeartBeat();

        Path solrHomeDir = SolrRecordWriter.findSolrConfig(conf);
        FileSystem fs = outputShardDir.getFileSystem(conf);
        EmbeddedSolrServer solr = createEmbeddedSolrServer(solrHomeDir, fs, outputShardDir);
        batchWriter = new BatchWriter(solr, batchSize, context.getTaskAttemptID().getTaskID(),
                SolrOutputFormat.getSolrWriterThreadCount(conf), SolrOutputFormat.getSolrWriterQueueSize(conf));

    } catch (Exception e) {
        throw new IllegalStateException(String.format("Failed to initialize record writer for %s, %s",
                context.getJobName(), conf.get("mapred.task.id")), e);
    } finally {
        heartBeater.cancelHeartBeat();
    }
}

From source file:org.apache.sqoop.manager.oracle.OraOopOutputFormatBase.java

License:Apache License

protected int getMapperId(TaskAttemptContext context) {

    return context.getTaskAttemptID().getTaskID().getId();
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java

License:Apache License

private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration,
        final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass,
        final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass)
        throws Exception {

    final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration);
    final TaskAttemptContext job = new TaskAttemptContextImpl(configuration,
            new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0));

    int vertexCount = 0;
    int outEdgeCount = 0;
    int inEdgeCount = 0;

    final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent()
            ? ReflectionUtils.newInstance(outFormatClass.get(), configuration)
            : null;//  w ww. ja  v a  2 s. co  m
    final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null
            : outputFormat.getRecordWriter(job);

    boolean foundKeyValue = false;
    for (final FileSplit split : fileSplits) {
        logger.info("\treading file split {}", split.getPath().getName() + " ({}",
                split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)");
        final RecordReader reader = inputFormat.createRecordReader(split, job);

        float lastProgress = -1f;
        while (reader.nextKeyValue()) {
            //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue());
            final float progress = reader.getProgress();
            assertTrue(progress >= lastProgress);
            assertEquals(NullWritable.class, reader.getCurrentKey().getClass());
            final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue();
            if (null != writer)
                writer.write(NullWritable.get(), vertexWritable);
            vertexCount++;
            outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT));
            inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN));
            //
            final Vertex vertex = vertexWritable.get();
            assertEquals(Integer.class, vertex.id().getClass());
            if (vertex.value("name").equals("SUGAR MAGNOLIA")) {
                foundKeyValue = true;
                assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT)));
                assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN)));
            }
            lastProgress = progress;
        }
    }

    assertEquals(8049, outEdgeCount);
    assertEquals(8049, inEdgeCount);
    assertEquals(outEdgeCount, inEdgeCount);
    assertEquals(808, vertexCount);
    assertTrue(foundKeyValue);

    if (null != writer) {
        writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID()));
        for (int i = 1; i < 10; i++) {
            final File outputDirectory = new File(
                    new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI());
            final List<FileSplit> splits = generateFileSplits(
                    new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/"
                            + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0"
                            + "/part-m-00000"),
                    i);
            validateFileSplits(splits, configuration, inputFormatClass, Optional.empty());
        }
    }
}

From source file:org.broadinstitute.sting.gatk.hadoop.LociRecordReader.java

License:Open Source License

public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException {
    final FileVirtualSplit split = (FileVirtualSplit) spl;

    file = split.getPath();//from   w w  w.jav a2  s.  co  m
    fs = file.getFileSystem(ctx.getConfiguration());

    final FSDataInputStream in = fs.open(file);
    codec = new BAMRecordCodec(new SAMFileReader(in).getFileHeader());

    in.seek(0);
    bci = new BlockCompressedInputStream(
            new WrapSeekable<FSDataInputStream>(in, fs.getFileStatus(file).getLen(), file));

    virtualStart = split.getStartVirtualOffset();
    fileStart = virtualStart >>> 16;
    virtualEnd = split.getEndVirtualOffset();
    fileEnd = virtualEnd >>> 16;
    idx = new SplittingBAMIndex(file.getFileSystem(ctx.getConfiguration()).open(getIdxPath(file)));
    codec.setInputStream(bci);
    bci.seek(virtualStart);

    JobConf job = new JobConf(ctx.getConfiguration());
    jobDir = new String(job.getJobLocalDir());
    attemptID = ctx.getTaskAttemptID().toString();
}

From source file:org.broadinstitute.sting.gatk.hadoop.SortOutputFormat.java

License:Open Source License

@Override
public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    int pos = 0;//  ww w . j  a  v  a  2 s  .co  m
    if (context == null) {
        throw new IOException("context is NULL");
    }

    Path[] p = FileInputFormat.getInputPaths(context);
    assert (p.length > 0);

    FileSystem fs = p[0].getFileSystem(context.getConfiguration());
    FileStatus[] status = fs.listStatus(p[0]);

    for (int i = 0; i < status.length; i++) {
        if ((status[i].getPath().getName()).endsWith(".bam")) {
            pos = i;
            break;
        }
    }

    if (super.header == null) {
        Configuration c = context.getConfiguration();
        readSAMHeaderFrom(status[pos].getPath(), fs);
        if ((context.getConfiguration().getBoolean("gatk.hadoop.ismerge", false)) == false) {
            setWriteHeader(true);
        } else if (context.getTaskAttemptID().getTaskID().getId() == 0) {
            setWriteHeader(true);
        }
    }
    return super.getRecordWriter(context);
}

From source file:org.broadinstitute.sting.gatk.hadoop.SortOutputFormat.java

License:Open Source License

@Override
public Path getDefaultWorkFile(TaskAttemptContext context, String ext) throws IOException {
    String filename = context.getTaskAttemptID().toString();
    String extension = ext.isEmpty() ? ext : "." + ext;
    int part = context.getTaskAttemptID().getTaskID().getId();
    return new Path(super.getDefaultWorkFile(context, ext).getParent(),
            String.format("%06d", part) + "-" + filename + extension);
}

From source file:org.gbif.ocurrence.index.solr.SolrRecordWriter.java

License:Apache License

@SuppressWarnings("unchecked")
public SolrRecordWriter(TaskAttemptContext context) {
    conf = context.getConfiguration();/*  ww w.j a v  a  2 s. co  m*/
    batchSize = SolrOutputFormat.getBatchSize(conf);

    setLogLevel("org.apache.solr.core", "WARN");
    setLogLevel("org.apache.solr.update", "WARN");

    heartBeater = new HeartBeater(context);
    try {
        heartBeater.needHeartBeat();
        /** The actual file in hdfs that holds the configuration. */

        final String configuredSolrConfigPath = conf.get(SolrOutputFormat.SETUP_OK);
        if (configuredSolrConfigPath == null) {
            throw new IllegalStateException(
                    String.format("The job did not pass %s", SolrOutputFormat.SETUP_OK));
        }
        outputZipFile = SolrOutputFormat.isOutputZipFormat(conf);

        this.fs = FileSystem.get(conf);
        perm = new Path(FileOutputFormat.getOutputPath(context), getOutFileName(context, "part"));

        // Make a task unique name that contains the actual index output name to
        // make debugging simpler
        // Note: if using JVM reuse, the sequence number will not be reset for a
        // new task using the jvm

        temp = conf.getLocalPath("mapred.local.dir",
                "solr_" + conf.get("mapred.task.id") + '.' + sequence.incrementAndGet());

        if (outputZipFile && !perm.getName().endsWith(".zip")) {
            perm = perm.suffix(".zip");
        }
        fs.delete(perm, true); // delete old, if any
        Path local = fs.startLocalOutput(perm, temp);

        solrHome = findSolrConfig(conf);

        // }
        // Verify that the solr home has a conf and lib directory
        if (solrHome == null) {
            throw new IOException("Unable to find solr home setting");
        }

        // Setup a solr instance that we can batch writes to
        LOG.info("SolrHome: " + solrHome.toUri());
        String dataDir = new File(local.toString(), "data").toString();
        // copy the schema to the conf dir
        File confDir = new File(local.toString(), "conf");
        confDir.mkdirs();
        File srcSchemaFile = new File(solrHome.toString(), "conf/schema.xml");
        assert srcSchemaFile.exists();
        FileUtils.copyFile(srcSchemaFile, new File(confDir, "schema.xml"));
        Properties props = new Properties();
        props.setProperty("solr.data.dir", dataDir);
        props.setProperty("solr.home", solrHome.toString());
        SolrResourceLoader loader = new SolrResourceLoader(solrHome.toString(), null, props);
        LOG.info(String.format(
                "Constructed instance information solr.home %s (%s), instance dir %s, conf dir %s, writing index to temporary directory %s, with permdir %s",
                solrHome, solrHome.toUri(), loader.getInstanceDir(), loader.getConfigDir(), dataDir, perm));
        CoreContainer container = new CoreContainer(loader);
        CoreDescriptor descr = new CoreDescriptor(container, "core1", solrHome.toString());
        descr.setDataDir(dataDir);
        descr.setCoreProperties(props);
        core = container.create(descr);
        container.register(core, false);
        solr = new EmbeddedSolrServer(container, "core1");
        batchWriter = new BatchWriter(solr, batchSize, context.getTaskAttemptID().getTaskID(),
                SolrOutputFormat.getSolrWriterThreadCount(conf), SolrOutputFormat.getSolrWriterQueueSize(conf));

        // instantiate the converter
        String className = SolrDocumentConverter.getSolrDocumentConverter(conf);
        Class<? extends SolrDocumentConverter> cls = (Class<? extends SolrDocumentConverter>) Class
                .forName(className);
        converter = (SolrDocumentConverter<K, V>) ReflectionUtils.newInstance(cls, conf);
    } catch (Exception e) {
        throw new IllegalStateException(String.format("Failed to initialize record writer for %s, %s",
                context.getJobName(), conf.get("mapred.task.id")), e);
    } finally {
        heartBeater.cancelHeartBeat();
    }
}

From source file:org.kiji.avro.mapreduce.TestAvroKeyOutputFormat.java

License:Apache License

/**
 * Tests that the record writer is contructed and returned correclty from the output format.
 *//*from  w  ww .j a  v  a  2 s  .c  om*/
private void testGetRecordWriter(Configuration conf, CodecFactory expectedCodec) throws IOException {
    // Configure a mock task attempt context.
    Job job = new Job(conf);
    job.getConfiguration().set("mapred.output.dir", mTempDir.getRoot().getPath());
    Schema writerSchema = Schema.create(Schema.Type.INT);
    AvroJob.setOutputKeySchema(job, writerSchema);
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(job.getConfiguration()).anyTimes();
    expect(context.getTaskAttemptID()).andReturn(new TaskAttemptID("id", 1, true, 1, 1)).anyTimes();

    // Create a mock record writer.
    @SuppressWarnings("unchecked")
    RecordWriter<AvroKey<Integer>, NullWritable> expectedRecordWriter = createMock(RecordWriter.class);
    AvroKeyOutputFormat.RecordWriterFactory recordWriterFactory = createMock(
            AvroKeyOutputFormat.RecordWriterFactory.class);

    // Expect the record writer factory to be called with appropriate parameters.
    Capture<CodecFactory> capturedCodecFactory = new Capture<CodecFactory>();
    expect(recordWriterFactory.create(eq(writerSchema), capture(capturedCodecFactory), // Capture for comparison later.
            anyObject(OutputStream.class))).andReturn(expectedRecordWriter);

    replay(context);
    replay(expectedRecordWriter);
    replay(recordWriterFactory);

    AvroKeyOutputFormat<Integer> outputFormat = new AvroKeyOutputFormat<Integer>(recordWriterFactory);
    RecordWriter<AvroKey<Integer>, NullWritable> recordWriter = outputFormat.getRecordWriter(context);
    // Make sure the expected codec was used.
    assertTrue(capturedCodecFactory.hasCaptured());
    assertEquals(expectedCodec.toString(), capturedCodecFactory.getValue().toString());

    verify(context);
    verify(expectedRecordWriter);
    verify(recordWriterFactory);

    assertNotNull(recordWriter);
    assertTrue(expectedRecordWriter == recordWriter);
}

From source file:org.kitesdk.data.mapreduce.DatasetKeyOutputFormat.java

License:Apache License

private static String getTaskAttemptDatasetName(TaskAttemptContext taskContext) {
    return taskContext.getTaskAttemptID().toString();
}

From source file:org.mrgeo.hadoop.multipleoutputs.DirectoryMultipleOutputs.java

License:Apache License

static TaskAttemptContext _getContext(final TaskAttemptContext context, final String nameOutput)
        throws IOException {
    TaskAttemptContext taskContext;/*from   ww w  . j a v a  2s  . c  o  m*/

    // The following trick leverages the instantiation of a record writer via
    // the job thus supporting arbitrary output formats; it also bypasses
    // the lack of the set method we want on Job here.
    final Configuration clonedConfiguration = new Configuration(context.getConfiguration());
    String path = getHdfsPath(context, nameOutput);
    clonedConfiguration.set("mapred.output.dir", path);

    final Job job = new Job(clonedConfiguration);
    job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput));
    job.setOutputKeyClass(getNamedOutputKeyClass(context, nameOutput));
    job.setOutputValueClass(getNamedOutputValueClass(context, nameOutput));

    taskContext = HadoopUtils.createTaskAttemptContext(job.getConfiguration(), context.getTaskAttemptID());
    return taskContext;
}