Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:org.apache.solr.hadoop.SolrRecordWriter.java

License:Apache License

public SolrRecordWriter(TaskAttemptContext context, Path outputShardDir, int batchSize) {
    this.batchSize = batchSize;
    this.batch = new ArrayList(batchSize);
    Configuration conf = context.getConfiguration();

    // setLogLevel("org.apache.solr.core", "WARN");
    // setLogLevel("org.apache.solr.update", "WARN");

    heartBeater = new HeartBeater(context);
    try {// w  w  w  .  jav  a 2s .  c om
        heartBeater.needHeartBeat();

        Path solrHomeDir = SolrRecordWriter.findSolrConfig(conf);
        FileSystem fs = outputShardDir.getFileSystem(conf);
        EmbeddedSolrServer solr = createEmbeddedSolrServer(solrHomeDir, fs, outputShardDir);
        batchWriter = new BatchWriter(solr, batchSize, context.getTaskAttemptID().getTaskID(),
                SolrOutputFormat.getSolrWriterThreadCount(conf), SolrOutputFormat.getSolrWriterQueueSize(conf));

    } catch (Exception e) {
        throw new IllegalStateException(String.format("Failed to initialize record writer for %s, %s",
                context.getJobName(), conf.get("mapred.task.id")), e);
    } finally {
        heartBeater.cancelHeartBeat();
    }
}

From source file:org.apache.sqoop.manager.oracle.OraOopOutputFormatBase.java

License:Apache License

protected int getMapperId(TaskAttemptContext context) {

    return context.getTaskAttemptID().getTaskID().getId();
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java

License:Apache License

private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration,
        final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass,
        final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass)
        throws Exception {

    final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration);
    final TaskAttemptContext job = new TaskAttemptContextImpl(configuration,
            new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0));

    int vertexCount = 0;
    int outEdgeCount = 0;
    int inEdgeCount = 0;

    final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent()
            ? ReflectionUtils.newInstance(outFormatClass.get(), configuration)
            : null;//  w ww. ja  v a  2 s. co  m
    final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null
            : outputFormat.getRecordWriter(job);

    boolean foundKeyValue = false;
    for (final FileSplit split : fileSplits) {
        logger.info("\treading file split {}", split.getPath().getName() + " ({}",
                split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)");
        final RecordReader reader = inputFormat.createRecordReader(split, job);

        float lastProgress = -1f;
        while (reader.nextKeyValue()) {
            //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue());
            final float progress = reader.getProgress();
            assertTrue(progress >= lastProgress);
            assertEquals(NullWritable.class, reader.getCurrentKey().getClass());
            final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue();
            if (null != writer)
                writer.write(NullWritable.get(), vertexWritable);
            vertexCount++;
            outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT));
            inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN));
            //
            final Vertex vertex = vertexWritable.get();
            assertEquals(Integer.class, vertex.id().getClass());
            if (vertex.value("name").equals("SUGAR MAGNOLIA")) {
                foundKeyValue = true;
                assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT)));
                assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN)));
            }
            lastProgress = progress;
        }
    }

    assertEquals(8049, outEdgeCount);
    assertEquals(8049, inEdgeCount);
    assertEquals(outEdgeCount, inEdgeCount);
    assertEquals(808, vertexCount);
    assertTrue(foundKeyValue);

    if (null != writer) {
        writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID()));
        for (int i = 1; i < 10; i++) {
            final File outputDirectory = new File(
                    new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI());
            final List<FileSplit> splits = generateFileSplits(
                    new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/"
                            + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0"
                            + "/part-m-00000"),
                    i);
            validateFileSplits(splits, configuration, inputFormatClass, Optional.empty());
        }
    }
}

From source file:org.broadinstitute.sting.gatk.hadoop.LociRecordReader.java

License:Open Source License

public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException {
    final FileVirtualSplit split = (FileVirtualSplit) spl;

    file = split.getPath();//from   w w  w.jav a2  s.  co  m
    fs = file.getFileSystem(ctx.getConfiguration());

    final FSDataInputStream in = fs.open(file);
    codec = new BAMRecordCodec(new SAMFileReader(in).getFileHeader());

    in.seek(0);
    bci = new BlockCompressedInputStream(
            new WrapSeekable<FSDataInputStream>(in, fs.getFileStatus(file).getLen(), file));

    virtualStart = split.getStartVirtualOffset();
    fileStart = virtualStart >>> 16;
    virtualEnd = split.getEndVirtualOffset();
    fileEnd = virtualEnd >>> 16;
    idx = new SplittingBAMIndex(file.getFileSystem(ctx.getConfiguration()).open(getIdxPath(file)));
    codec.setInputStream(bci);
    bci.seek(virtualStart);

    JobConf job = new JobConf(ctx.getConfiguration());
    jobDir = new String(job.getJobLocalDir());
    attemptID = ctx.getTaskAttemptID().toString();
}

From source file:org.broadinstitute.sting.gatk.hadoop.SortOutputFormat.java

License:Open Source License

@Override
public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    int pos = 0;//  ww w . j  a  v  a  2 s  .co  m
    if (context == null) {
        throw new IOException("context is NULL");
    }

    Path[] p = FileInputFormat.getInputPaths(context);
    assert (p.length > 0);

    FileSystem fs = p[0].getFileSystem(context.getConfiguration());
    FileStatus[] status = fs.listStatus(p[0]);

    for (int i = 0; i < status.length; i++) {
        if ((status[i].getPath().getName()).endsWith(".bam")) {
            pos = i;
            break;
        }
    }

    if (super.header == null) {
        Configuration c = context.getConfiguration();
        readSAMHeaderFrom(status[pos].getPath(), fs);
        if ((context.getConfiguration().getBoolean("gatk.hadoop.ismerge", false)) == false) {
            setWriteHeader(true);
        } else if (context.getTaskAttemptID().getTaskID().getId() == 0) {
            setWriteHeader(true);
        }
    }
    return super.getRecordWriter(context);
}

From source file:org.broadinstitute.sting.gatk.hadoop.SortOutputFormat.java

License:Open Source License

@Override
public Path getDefaultWorkFile(TaskAttemptContext context, String ext) throws IOException {
    String filename = context.getTaskAttemptID().toString();
    String extension = ext.isEmpty() ? ext : "." + ext;
    int part = context.getTaskAttemptID().getTaskID().getId();
    return new Path(super.getDefaultWorkFile(context, ext).getParent(),
            String.format("%06d", part) + "-" + filename + extension);
}

From source file:org.gbif.ocurrence.index.solr.SolrRecordWriter.java

License:Apache License

@SuppressWarnings("unchecked")
public SolrRecordWriter(TaskAttemptContext context) {
    conf = context.getConfiguration();/*  ww w.j a v  a  2 s. co  m*/
    batchSize = SolrOutputFormat.getBatchSize(conf);

    setLogLevel("org.apache.solr.core", "WARN");
    setLogLevel("org.apache.solr.update", "WARN");

    heartBeater = new HeartBeater(context);
    try {
        heartBeater.needHeartBeat();
        /** The actual file in hdfs that holds the configuration. */

        final String configuredSolrConfigPath = conf.get(SolrOutputFormat.SETUP_OK);
        if (configuredSolrConfigPath == null) {
            throw new IllegalStateException(
                    String.format("The job did not pass %s", SolrOutputFormat.SETUP_OK));
        }
        outputZipFile = SolrOutputFormat.isOutputZipFormat(conf);

        this.fs = FileSystem.get(conf);
        perm = new Path(FileOutputFormat.getOutputPath(context), getOutFileName(context, "part"));

        // Make a task unique name that contains the actual index output name to
        // make debugging simpler
        // Note: if using JVM reuse, the sequence number will not be reset for a
        // new task using the jvm

        temp = conf.getLocalPath("mapred.local.dir",
                "solr_" + conf.get("mapred.task.id") + '.' + sequence.incrementAndGet());

        if (outputZipFile && !perm.getName().endsWith(".zip")) {
            perm = perm.suffix(".zip");
        }
        fs.delete(perm, true); // delete old, if any
        Path local = fs.startLocalOutput(perm, temp);

        solrHome = findSolrConfig(conf);

        // }
        // Verify that the solr home has a conf and lib directory
        if (solrHome == null) {
            throw new IOException("Unable to find solr home setting");
        }

        // Setup a solr instance that we can batch writes to
        LOG.info("SolrHome: " + solrHome.toUri());
        String dataDir = new File(local.toString(), "data").toString();
        // copy the schema to the conf dir
        File confDir = new File(local.toString(), "conf");
        confDir.mkdirs();
        File srcSchemaFile = new File(solrHome.toString(), "conf/schema.xml");
        assert srcSchemaFile.exists();
        FileUtils.copyFile(srcSchemaFile, new File(confDir, "schema.xml"));
        Properties props = new Properties();
        props.setProperty("solr.data.dir", dataDir);
        props.setProperty("solr.home", solrHome.toString());
        SolrResourceLoader loader = new SolrResourceLoader(solrHome.toString(), null, props);
        LOG.info(String.format(
                "Constructed instance information solr.home %s (%s), instance dir %s, conf dir %s, writing index to temporary directory %s, with permdir %s",
                solrHome, solrHome.toUri(), loader.getInstanceDir(), loader.getConfigDir(), dataDir, perm));
        CoreContainer container = new CoreContainer(loader);
        CoreDescriptor descr = new CoreDescriptor(container, "core1", solrHome.toString());
        descr.setDataDir(dataDir);
        descr.setCoreProperties(props);
        core = container.create(descr);
        container.register(core, false);
        solr = new EmbeddedSolrServer(container, "core1");
        batchWriter = new BatchWriter(solr, batchSize, context.getTaskAttemptID().getTaskID(),
                SolrOutputFormat.getSolrWriterThreadCount(conf), SolrOutputFormat.getSolrWriterQueueSize(conf));

        // instantiate the converter
        String className = SolrDocumentConverter.getSolrDocumentConverter(conf);
        Class<? extends SolrDocumentConverter> cls = (Class<? extends SolrDocumentConverter>) Class
                .forName(className);
        converter = (SolrDocumentConverter<K, V>) ReflectionUtils.newInstance(cls, conf);
    } catch (Exception e) {
        throw new IllegalStateException(String.format("Failed to initialize record writer for %s, %s",
                context.getJobName(), conf.get("mapred.task.id")), e);
    } finally {
        heartBeater.cancelHeartBeat();
    }
}

From source file:org.kiji.avro.mapreduce.TestAvroKeyOutputFormat.java

License:Apache License

/**
 * Tests that the record writer is contructed and returned correclty from the output format.
 *//*from  w  ww .j a  v  a  2 s  .c  om*/
private void testGetRecordWriter(Configuration conf, CodecFactory expectedCodec) throws IOException {
    // Configure a mock task attempt context.
    Job job = new Job(conf);
    job.getConfiguration().set("mapred.output.dir", mTempDir.getRoot().getPath());
    Schema writerSchema = Schema.create(Schema.Type.INT);
    AvroJob.setOutputKeySchema(job, writerSchema);
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(job.getConfiguration()).anyTimes();
    expect(context.getTaskAttemptID()).andReturn(new TaskAttemptID("id", 1, true, 1, 1)).anyTimes();

    // Create a mock record writer.
    @SuppressWarnings("unchecked")
    RecordWriter<AvroKey<Integer>, NullWritable> expectedRecordWriter = createMock(RecordWriter.class);
    AvroKeyOutputFormat.RecordWriterFactory recordWriterFactory = createMock(
            AvroKeyOutputFormat.RecordWriterFactory.class);

    // Expect the record writer factory to be called with appropriate parameters.
    Capture<CodecFactory> capturedCodecFactory = new Capture<CodecFactory>();
    expect(recordWriterFactory.create(eq(writerSchema), capture(capturedCodecFactory), // Capture for comparison later.
            anyObject(OutputStream.class))).andReturn(expectedRecordWriter);

    replay(context);
    replay(expectedRecordWriter);
    replay(recordWriterFactory);

    AvroKeyOutputFormat<Integer> outputFormat = new AvroKeyOutputFormat<Integer>(recordWriterFactory);
    RecordWriter<AvroKey<Integer>, NullWritable> recordWriter = outputFormat.getRecordWriter(context);
    // Make sure the expected codec was used.
    assertTrue(capturedCodecFactory.hasCaptured());
    assertEquals(expectedCodec.toString(), capturedCodecFactory.getValue().toString());

    verify(context);
    verify(expectedRecordWriter);
    verify(recordWriterFactory);

    assertNotNull(recordWriter);
    assertTrue(expectedRecordWriter == recordWriter);
}

From source file:org.kitesdk.data.mapreduce.DatasetKeyOutputFormat.java

License:Apache License

private static String getTaskAttemptDatasetName(TaskAttemptContext taskContext) {
    return taskContext.getTaskAttemptID().toString();
}

From source file:org.mrgeo.hadoop.multipleoutputs.DirectoryMultipleOutputs.java

License:Apache License

static TaskAttemptContext _getContext(final TaskAttemptContext context, final String nameOutput)
        throws IOException {
    TaskAttemptContext taskContext;/*from   ww w  . j a v a  2s  . c  o  m*/

    // The following trick leverages the instantiation of a record writer via
    // the job thus supporting arbitrary output formats; it also bypasses
    // the lack of the set method we want on Job here.
    final Configuration clonedConfiguration = new Configuration(context.getConfiguration());
    String path = getHdfsPath(context, nameOutput);
    clonedConfiguration.set("mapred.output.dir", path);

    final Job job = new Job(clonedConfiguration);
    job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput));
    job.setOutputKeyClass(getNamedOutputKeyClass(context, nameOutput));
    job.setOutputValueClass(getNamedOutputValueClass(context, nameOutput));

    taskContext = HadoopUtils.createTaskAttemptContext(job.getConfiguration(), context.getTaskAttemptID());
    return taskContext;
}