List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID
public TaskAttemptID getTaskAttemptID();
From source file:org.apache.solr.hadoop.SolrRecordWriter.java
License:Apache License
public SolrRecordWriter(TaskAttemptContext context, Path outputShardDir, int batchSize) { this.batchSize = batchSize; this.batch = new ArrayList(batchSize); Configuration conf = context.getConfiguration(); // setLogLevel("org.apache.solr.core", "WARN"); // setLogLevel("org.apache.solr.update", "WARN"); heartBeater = new HeartBeater(context); try {// w w w . jav a 2s . c om heartBeater.needHeartBeat(); Path solrHomeDir = SolrRecordWriter.findSolrConfig(conf); FileSystem fs = outputShardDir.getFileSystem(conf); EmbeddedSolrServer solr = createEmbeddedSolrServer(solrHomeDir, fs, outputShardDir); batchWriter = new BatchWriter(solr, batchSize, context.getTaskAttemptID().getTaskID(), SolrOutputFormat.getSolrWriterThreadCount(conf), SolrOutputFormat.getSolrWriterQueueSize(conf)); } catch (Exception e) { throw new IllegalStateException(String.format("Failed to initialize record writer for %s, %s", context.getJobName(), conf.get("mapred.task.id")), e); } finally { heartBeater.cancelHeartBeat(); } }
From source file:org.apache.sqoop.manager.oracle.OraOopOutputFormatBase.java
License:Apache License
protected int getMapperId(TaskAttemptContext context) { return context.getTaskAttemptID().getTaskID().getId(); }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java
License:Apache License
private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration, final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass, final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass) throws Exception { final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration); final TaskAttemptContext job = new TaskAttemptContextImpl(configuration, new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0)); int vertexCount = 0; int outEdgeCount = 0; int inEdgeCount = 0; final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent() ? ReflectionUtils.newInstance(outFormatClass.get(), configuration) : null;// w ww. ja v a 2 s. co m final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null : outputFormat.getRecordWriter(job); boolean foundKeyValue = false; for (final FileSplit split : fileSplits) { logger.info("\treading file split {}", split.getPath().getName() + " ({}", split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)"); final RecordReader reader = inputFormat.createRecordReader(split, job); float lastProgress = -1f; while (reader.nextKeyValue()) { //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue()); final float progress = reader.getProgress(); assertTrue(progress >= lastProgress); assertEquals(NullWritable.class, reader.getCurrentKey().getClass()); final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue(); if (null != writer) writer.write(NullWritable.get(), vertexWritable); vertexCount++; outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT)); inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN)); // final Vertex vertex = vertexWritable.get(); assertEquals(Integer.class, vertex.id().getClass()); if (vertex.value("name").equals("SUGAR MAGNOLIA")) { foundKeyValue = true; assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT))); assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN))); } lastProgress = progress; } } assertEquals(8049, outEdgeCount); assertEquals(8049, inEdgeCount); assertEquals(outEdgeCount, inEdgeCount); assertEquals(808, vertexCount); assertTrue(foundKeyValue); if (null != writer) { writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID())); for (int i = 1; i < 10; i++) { final File outputDirectory = new File( new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI()); final List<FileSplit> splits = generateFileSplits( new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/" + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0" + "/part-m-00000"), i); validateFileSplits(splits, configuration, inputFormatClass, Optional.empty()); } } }
From source file:org.broadinstitute.sting.gatk.hadoop.LociRecordReader.java
License:Open Source License
public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException { final FileVirtualSplit split = (FileVirtualSplit) spl; file = split.getPath();//from w w w.jav a2 s. co m fs = file.getFileSystem(ctx.getConfiguration()); final FSDataInputStream in = fs.open(file); codec = new BAMRecordCodec(new SAMFileReader(in).getFileHeader()); in.seek(0); bci = new BlockCompressedInputStream( new WrapSeekable<FSDataInputStream>(in, fs.getFileStatus(file).getLen(), file)); virtualStart = split.getStartVirtualOffset(); fileStart = virtualStart >>> 16; virtualEnd = split.getEndVirtualOffset(); fileEnd = virtualEnd >>> 16; idx = new SplittingBAMIndex(file.getFileSystem(ctx.getConfiguration()).open(getIdxPath(file))); codec.setInputStream(bci); bci.seek(virtualStart); JobConf job = new JobConf(ctx.getConfiguration()); jobDir = new String(job.getJobLocalDir()); attemptID = ctx.getTaskAttemptID().toString(); }
From source file:org.broadinstitute.sting.gatk.hadoop.SortOutputFormat.java
License:Open Source License
@Override public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(TaskAttemptContext context) throws IOException { int pos = 0;// ww w . j a v a 2 s .co m if (context == null) { throw new IOException("context is NULL"); } Path[] p = FileInputFormat.getInputPaths(context); assert (p.length > 0); FileSystem fs = p[0].getFileSystem(context.getConfiguration()); FileStatus[] status = fs.listStatus(p[0]); for (int i = 0; i < status.length; i++) { if ((status[i].getPath().getName()).endsWith(".bam")) { pos = i; break; } } if (super.header == null) { Configuration c = context.getConfiguration(); readSAMHeaderFrom(status[pos].getPath(), fs); if ((context.getConfiguration().getBoolean("gatk.hadoop.ismerge", false)) == false) { setWriteHeader(true); } else if (context.getTaskAttemptID().getTaskID().getId() == 0) { setWriteHeader(true); } } return super.getRecordWriter(context); }
From source file:org.broadinstitute.sting.gatk.hadoop.SortOutputFormat.java
License:Open Source License
@Override public Path getDefaultWorkFile(TaskAttemptContext context, String ext) throws IOException { String filename = context.getTaskAttemptID().toString(); String extension = ext.isEmpty() ? ext : "." + ext; int part = context.getTaskAttemptID().getTaskID().getId(); return new Path(super.getDefaultWorkFile(context, ext).getParent(), String.format("%06d", part) + "-" + filename + extension); }
From source file:org.gbif.ocurrence.index.solr.SolrRecordWriter.java
License:Apache License
@SuppressWarnings("unchecked") public SolrRecordWriter(TaskAttemptContext context) { conf = context.getConfiguration();/* ww w.j a v a 2 s. co m*/ batchSize = SolrOutputFormat.getBatchSize(conf); setLogLevel("org.apache.solr.core", "WARN"); setLogLevel("org.apache.solr.update", "WARN"); heartBeater = new HeartBeater(context); try { heartBeater.needHeartBeat(); /** The actual file in hdfs that holds the configuration. */ final String configuredSolrConfigPath = conf.get(SolrOutputFormat.SETUP_OK); if (configuredSolrConfigPath == null) { throw new IllegalStateException( String.format("The job did not pass %s", SolrOutputFormat.SETUP_OK)); } outputZipFile = SolrOutputFormat.isOutputZipFormat(conf); this.fs = FileSystem.get(conf); perm = new Path(FileOutputFormat.getOutputPath(context), getOutFileName(context, "part")); // Make a task unique name that contains the actual index output name to // make debugging simpler // Note: if using JVM reuse, the sequence number will not be reset for a // new task using the jvm temp = conf.getLocalPath("mapred.local.dir", "solr_" + conf.get("mapred.task.id") + '.' + sequence.incrementAndGet()); if (outputZipFile && !perm.getName().endsWith(".zip")) { perm = perm.suffix(".zip"); } fs.delete(perm, true); // delete old, if any Path local = fs.startLocalOutput(perm, temp); solrHome = findSolrConfig(conf); // } // Verify that the solr home has a conf and lib directory if (solrHome == null) { throw new IOException("Unable to find solr home setting"); } // Setup a solr instance that we can batch writes to LOG.info("SolrHome: " + solrHome.toUri()); String dataDir = new File(local.toString(), "data").toString(); // copy the schema to the conf dir File confDir = new File(local.toString(), "conf"); confDir.mkdirs(); File srcSchemaFile = new File(solrHome.toString(), "conf/schema.xml"); assert srcSchemaFile.exists(); FileUtils.copyFile(srcSchemaFile, new File(confDir, "schema.xml")); Properties props = new Properties(); props.setProperty("solr.data.dir", dataDir); props.setProperty("solr.home", solrHome.toString()); SolrResourceLoader loader = new SolrResourceLoader(solrHome.toString(), null, props); LOG.info(String.format( "Constructed instance information solr.home %s (%s), instance dir %s, conf dir %s, writing index to temporary directory %s, with permdir %s", solrHome, solrHome.toUri(), loader.getInstanceDir(), loader.getConfigDir(), dataDir, perm)); CoreContainer container = new CoreContainer(loader); CoreDescriptor descr = new CoreDescriptor(container, "core1", solrHome.toString()); descr.setDataDir(dataDir); descr.setCoreProperties(props); core = container.create(descr); container.register(core, false); solr = new EmbeddedSolrServer(container, "core1"); batchWriter = new BatchWriter(solr, batchSize, context.getTaskAttemptID().getTaskID(), SolrOutputFormat.getSolrWriterThreadCount(conf), SolrOutputFormat.getSolrWriterQueueSize(conf)); // instantiate the converter String className = SolrDocumentConverter.getSolrDocumentConverter(conf); Class<? extends SolrDocumentConverter> cls = (Class<? extends SolrDocumentConverter>) Class .forName(className); converter = (SolrDocumentConverter<K, V>) ReflectionUtils.newInstance(cls, conf); } catch (Exception e) { throw new IllegalStateException(String.format("Failed to initialize record writer for %s, %s", context.getJobName(), conf.get("mapred.task.id")), e); } finally { heartBeater.cancelHeartBeat(); } }
From source file:org.kiji.avro.mapreduce.TestAvroKeyOutputFormat.java
License:Apache License
/** * Tests that the record writer is contructed and returned correclty from the output format. *//*from w ww .j a v a 2 s .c om*/ private void testGetRecordWriter(Configuration conf, CodecFactory expectedCodec) throws IOException { // Configure a mock task attempt context. Job job = new Job(conf); job.getConfiguration().set("mapred.output.dir", mTempDir.getRoot().getPath()); Schema writerSchema = Schema.create(Schema.Type.INT); AvroJob.setOutputKeySchema(job, writerSchema); TaskAttemptContext context = createMock(TaskAttemptContext.class); expect(context.getConfiguration()).andReturn(job.getConfiguration()).anyTimes(); expect(context.getTaskAttemptID()).andReturn(new TaskAttemptID("id", 1, true, 1, 1)).anyTimes(); // Create a mock record writer. @SuppressWarnings("unchecked") RecordWriter<AvroKey<Integer>, NullWritable> expectedRecordWriter = createMock(RecordWriter.class); AvroKeyOutputFormat.RecordWriterFactory recordWriterFactory = createMock( AvroKeyOutputFormat.RecordWriterFactory.class); // Expect the record writer factory to be called with appropriate parameters. Capture<CodecFactory> capturedCodecFactory = new Capture<CodecFactory>(); expect(recordWriterFactory.create(eq(writerSchema), capture(capturedCodecFactory), // Capture for comparison later. anyObject(OutputStream.class))).andReturn(expectedRecordWriter); replay(context); replay(expectedRecordWriter); replay(recordWriterFactory); AvroKeyOutputFormat<Integer> outputFormat = new AvroKeyOutputFormat<Integer>(recordWriterFactory); RecordWriter<AvroKey<Integer>, NullWritable> recordWriter = outputFormat.getRecordWriter(context); // Make sure the expected codec was used. assertTrue(capturedCodecFactory.hasCaptured()); assertEquals(expectedCodec.toString(), capturedCodecFactory.getValue().toString()); verify(context); verify(expectedRecordWriter); verify(recordWriterFactory); assertNotNull(recordWriter); assertTrue(expectedRecordWriter == recordWriter); }
From source file:org.kitesdk.data.mapreduce.DatasetKeyOutputFormat.java
License:Apache License
private static String getTaskAttemptDatasetName(TaskAttemptContext taskContext) { return taskContext.getTaskAttemptID().toString(); }
From source file:org.mrgeo.hadoop.multipleoutputs.DirectoryMultipleOutputs.java
License:Apache License
static TaskAttemptContext _getContext(final TaskAttemptContext context, final String nameOutput) throws IOException { TaskAttemptContext taskContext;/*from ww w . j a v a 2s . c o m*/ // The following trick leverages the instantiation of a record writer via // the job thus supporting arbitrary output formats; it also bypasses // the lack of the set method we want on Job here. final Configuration clonedConfiguration = new Configuration(context.getConfiguration()); String path = getHdfsPath(context, nameOutput); clonedConfiguration.set("mapred.output.dir", path); final Job job = new Job(clonedConfiguration); job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput)); job.setOutputKeyClass(getNamedOutputKeyClass(context, nameOutput)); job.setOutputValueClass(getNamedOutputValueClass(context, nameOutput)); taskContext = HadoopUtils.createTaskAttemptContext(job.getConfiguration(), context.getTaskAttemptID()); return taskContext; }