List of usage examples for org.apache.hadoop.mapreduce OutputCommitter commitTask
public abstract void commitTask(TaskAttemptContext taskContext) throws IOException;
From source file:org.apache.giraph.worker.BspServiceWorkerForCohesiveSubgraph.java
License:Apache License
/** * Save the vertices using the user-defined VertexOutputFormat from our * vertexArray based on the split./* w w w.j a va 2s . c o m*/ * * @param numLocalVertices Number of local vertices * @throws InterruptedException */ private void saveVertices(long numLocalVertices) throws IOException, InterruptedException { if (getConfiguration().getVertexOutputFormatClass() == null) { LOG.warn("saveVertices: " + GiraphConstants.VERTEX_OUTPUT_FORMAT_CLASS + " not specified -- there will be no saved output"); return; } if (getConfiguration().doOutputDuringComputation()) { if (LOG.isInfoEnabled()) { LOG.info("saveVertices: The option for doing output during " + "computation is selected, so there will be no saving of the " + "output in the end of application"); } return; } int numThreads = Math.min(getConfiguration().getNumOutputThreads(), getPartitionStore().getNumPartitions()); LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Starting to save " + numLocalVertices + " vertices " + "using " + numThreads + " threads"); final VertexOutputFormat<I, V, E> vertexOutputFormat = getConfiguration().createVertexOutputFormat(); CallableFactory<Void> callableFactory = new CallableFactory<Void>() { @Override public Callable<Void> newCallable(int callableId) { return new Callable<Void>() { @Override public Void call() throws Exception { VertexWriter<I, V, E> vertexWriter = vertexOutputFormat.createVertexWriter(getContext()); vertexWriter.setConf( (ImmutableClassesGiraphConfiguration<I, V, E, Writable>) getConfiguration()); vertexWriter.initialize(getContext()); long verticesWritten = 0; long nextPrintVertices = 0; long nextPrintMsecs = System.currentTimeMillis() + 15000; int partitionIndex = 0; int numPartitions = getPartitionStore().getNumPartitions(); for (Integer partitionId : getPartitionStore().getPartitionIds()) { Partition<I, V, E, M> partition = getPartitionStore().getPartition(partitionId); if (getConfiguration().getBoolean("giraph.ktruss.subgraph", false)) { /** * Special for the ktruss output */ BasicGraphStoreInterface gs = (BasicGraphStoreInterface) (partition); for (BasicVertex rv : gs.getLocalVertex()) { Vertex<I, V, E, M> vertex = getConfiguration().createVertex(); List<Edge<I, E>> edges = Lists.newLinkedList(); for (BasicEdge nb : rv.getNeighbors()) { edges.add(EdgeFactory.create(((I) new IntWritable(nb.getTargetId())), ((E) NullWritable.get()))); } vertex.initialize((I) (new IntWritable(rv.getId())), ((V) new IntWritable(0)), edges); vertexWriter.writeVertex(vertex); ++verticesWritten; // Update status at most every 250k vertices or 15 seconds if (verticesWritten > nextPrintVertices && System.currentTimeMillis() > nextPrintMsecs) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Saved " + verticesWritten + " out of " + partition.getVertexCount() + " partition vertices, " + "on partition " + partitionIndex + " out of " + numPartitions); nextPrintMsecs = System.currentTimeMillis() + 15000; nextPrintVertices = verticesWritten + 250000; } } } else { for (Vertex<I, V, E, M> vertex : partition) { vertexWriter.writeVertex(vertex); ++verticesWritten; // Update status at most every 250k vertices or 15 seconds if (verticesWritten > nextPrintVertices && System.currentTimeMillis() > nextPrintMsecs) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Saved " + verticesWritten + " out of " + partition.getVertexCount() + " partition vertices, " + "on partition " + partitionIndex + " out of " + numPartitions); nextPrintMsecs = System.currentTimeMillis() + 15000; nextPrintVertices = verticesWritten + 250000; } } } ++partitionIndex; } vertexWriter.close(getContext()); // the temp results are saved now return null; } }; } }; ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext()); LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Done saving vertices."); // YARN: must complete the commit the "task" output, Hadoop isn't there. if (getConfiguration().isPureYarnJob() && getConfiguration().getVertexOutputFormatClass() != null) { try { OutputCommitter outputCommitter = vertexOutputFormat.getOutputCommitter(getContext()); if (outputCommitter.needsTaskCommit(getContext())) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "OutputCommitter: committing task output."); // transfer from temp dirs to "task commit" dirs to prep for // the master's OutputCommitter#commitJob(context) call to finish. outputCommitter.commitTask(getContext()); } } catch (InterruptedException ie) { LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie); } catch (IOException ioe) { LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe); } } }
From source file:org.apache.hcatalog.data.transfer.impl.HCatOutputFormatWriter.java
License:Apache License
@Override public void write(Iterator<HCatRecord> recordItr) throws HCatException { int id = sp.getId(); setVarsInConf(id);/*from w w w .j av a 2s .co m*/ HCatOutputFormat outFormat = new HCatOutputFormat(); TaskAttemptContext cntxt = HCatHadoopShims.Instance.get().createTaskAttemptContext(conf, new TaskAttemptID(HCatHadoopShims.Instance.get().createTaskID(), id)); OutputCommitter committer = null; RecordWriter<WritableComparable<?>, HCatRecord> writer; try { committer = outFormat.getOutputCommitter(cntxt); committer.setupTask(cntxt); writer = outFormat.getRecordWriter(cntxt); while (recordItr.hasNext()) { HCatRecord rec = recordItr.next(); writer.write(null, rec); } writer.close(cntxt); if (committer.needsTaskCommit(cntxt)) { committer.commitTask(cntxt); } } catch (IOException e) { if (null != committer) { try { committer.abortTask(cntxt); } catch (IOException e1) { throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } } throw new HCatException("Failed while writing", e); } catch (InterruptedException e) { if (null != committer) { try { committer.abortTask(cntxt); } catch (IOException e1) { throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } } throw new HCatException("Failed while writing", e); } }
From source file:org.apache.hcatalog.mapreduce.FileRecordWriterContainer.java
License:Apache License
@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { Reporter reporter = InternalUtil.createReporter(context); if (dynamicPartitioningUsed) { for (org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> bwriter : baseDynamicWriters .values()) {/* w ww .ja v a 2s . com*/ //We are in RecordWriter.close() make sense that the context would be TaskInputOutput bwriter.close(reporter); } for (Map.Entry<String, org.apache.hadoop.mapred.OutputCommitter> entry : baseDynamicCommitters .entrySet()) { org.apache.hadoop.mapred.TaskAttemptContext currContext = dynamicContexts.get(entry.getKey()); OutputCommitter baseOutputCommitter = entry.getValue(); if (baseOutputCommitter.needsTaskCommit(currContext)) { baseOutputCommitter.commitTask(currContext); } } } else { getBaseRecordWriter().close(reporter); } }
From source file:org.apache.hcatalog.pig.TestE2EScenarios.java
License:Apache License
private void copyTable(String in, String out) throws IOException, InterruptedException { Job ijob = new Job(); Job ojob = new Job(); HCatInputFormat inpy = new HCatInputFormat(); inpy.setInput(ijob, null, in);//from ww w. j a v a 2s .c om HCatOutputFormat oupy = new HCatOutputFormat(); oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>())); // Test HCatContext System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent()); if (HCatContext.INSTANCE.getConf().isPresent()) { System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get() .getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)); } HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration()); System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString()); oupy.setSchema(ojob, tableSchema); oupy.checkOutputSpecs(ojob); OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration())); oc.setupJob(ojob); for (InputSplit split : inpy.getSplits(ijob)) { TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration()); TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration()); RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext); rr.initialize(split, rtaskContext); OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext); taskOc.setupTask(wtaskContext); RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext); while (rr.nextKeyValue()) { rw.write(rr.getCurrentKey(), rr.getCurrentValue()); } rw.close(wtaskContext); taskOc.commitTask(wtaskContext); rr.close(); } oc.commitJob(ojob); }
From source file:org.apache.hive.hcatalog.data.transfer.impl.HCatOutputFormatWriter.java
License:Apache License
@Override public void write(Iterator<HCatRecord> recordItr) throws HCatException { int id = sp.getId(); setVarsInConf(id);//from ww w .j a v a 2 s .c o m HCatOutputFormat outFormat = new HCatOutputFormat(); TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id)); OutputCommitter committer = null; RecordWriter<WritableComparable<?>, HCatRecord> writer; try { committer = outFormat.getOutputCommitter(cntxt); committer.setupTask(cntxt); writer = outFormat.getRecordWriter(cntxt); while (recordItr.hasNext()) { HCatRecord rec = recordItr.next(); writer.write(null, rec); } writer.close(cntxt); if (committer.needsTaskCommit(cntxt)) { committer.commitTask(cntxt); } } catch (IOException e) { if (null != committer) { try { committer.abortTask(cntxt); } catch (IOException e1) { throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } } throw new HCatException("Failed while writing", e); } catch (InterruptedException e) { if (null != committer) { try { committer.abortTask(cntxt); } catch (IOException e1) { throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } } throw new HCatException("Failed while writing", e); } }
From source file:org.apache.hive.hcatalog.mapreduce.DynamicPartitionFileRecordWriterContainer.java
License:Apache License
@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { Reporter reporter = InternalUtil.createReporter(context); for (RecordWriter<? super WritableComparable<?>, ? super Writable> bwriter : baseDynamicWriters.values()) { // We are in RecordWriter.close() make sense that the context would be // TaskInputOutput. bwriter.close(reporter);//from ww w. ja v a 2 s .c o m } TaskCommitContextRegistry.getInstance().register(context, new TaskCommitContextRegistry.TaskCommitterProxy() { @Override public void abortTask(TaskAttemptContext context) throws IOException { for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo .entrySet()) { String dynKey = outputJobInfoEntry.getKey(); OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue(); LOG.info("Aborting task-attempt for " + outputJobInfo.getLocation()); baseDynamicCommitters.get(dynKey).abortTask(dynamicContexts.get(dynKey)); } } @Override public void commitTask(TaskAttemptContext context) throws IOException { for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo .entrySet()) { String dynKey = outputJobInfoEntry.getKey(); OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue(); LOG.info("Committing task-attempt for " + outputJobInfo.getLocation()); TaskAttemptContext dynContext = dynamicContexts.get(dynKey); OutputCommitter dynCommitter = baseDynamicCommitters.get(dynKey); if (dynCommitter.needsTaskCommit(dynContext)) { dynCommitter.commitTask(dynContext); } else { LOG.info("Skipping commitTask() for " + outputJobInfo.getLocation()); } } } }); }
From source file:org.apache.hive.hcatalog.mapreduce.TestHCatOutputFormat.java
License:Apache License
public void publishTest(Job job) throws Exception { HCatOutputFormat hcof = new HCatOutputFormat(); TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID()); OutputCommitter committer = hcof.getOutputCommitter(tac); committer.setupJob(job);//from w w w .ja v a2 s.c om committer.setupTask(tac); committer.commitTask(tac); committer.commitJob(job); Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1")); assertNotNull(part); StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters()); assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue"); assertTrue(part.getSd().getLocation().indexOf("p1") != -1); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Task.java
License:Apache License
/** * Commit task.//from w w w . j a va 2 s . c o m * * @param outputFormat Output format. * @throws IgniteCheckedException In case of Grid exception. * @throws IOException In case of IO exception. * @throws InterruptedException In case of interrupt. */ protected void commit(@Nullable OutputFormat outputFormat) throws IgniteCheckedException, IOException, InterruptedException { if (hadoopCtx.writer() != null) { assert outputFormat != null; OutputCommitter outputCommitter = outputFormat.getOutputCommitter(hadoopCtx); if (outputCommitter.needsTaskCommit(hadoopCtx)) outputCommitter.commitTask(hadoopCtx); } }
From source file:org.apache.parquet.pig.PerfTest2.java
License:Apache License
public static void write(String out) throws IOException, ParserException, InterruptedException, ExecException { {/* ww w . jav a 2s.c om*/ StringBuilder schemaString = new StringBuilder("a0: chararray"); for (int i = 1; i < COLUMN_COUNT; i++) { schemaString.append(", a" + i + ": chararray"); } String location = out; String schema = schemaString.toString(); StoreFuncInterface storer = new ParquetStorer(); Job job = new Job(conf); storer.setStoreFuncUDFContextSignature("sig"); String absPath = storer.relToAbsPathForStoreLocation(location, new Path(new File(".").getAbsoluteFile().toURI())); storer.setStoreLocation(absPath, job); storer.checkSchema(new ResourceSchema(Utils.getSchemaFromString(schema))); @SuppressWarnings("unchecked") // that's how the base class is defined OutputFormat<Void, Tuple> outputFormat = storer.getOutputFormat(); // it's ContextUtil.getConfiguration(job) and not just conf ! JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job), new JobID("jt", jobid++)); outputFormat.checkOutputSpecs(jobContext); if (schema != null) { ResourceSchema resourceSchema = new ResourceSchema(Utils.getSchemaFromString(schema)); storer.checkSchema(resourceSchema); if (storer instanceof StoreMetadata) { ((StoreMetadata) storer).storeSchema(resourceSchema, absPath, job); } } TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext( ContextUtil.getConfiguration(job), new TaskAttemptID("jt", jobid, true, 1, 0)); RecordWriter<Void, Tuple> recordWriter = outputFormat.getRecordWriter(taskAttemptContext); storer.prepareToWrite(recordWriter); for (int i = 0; i < ROW_COUNT; i++) { Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT); for (int j = 0; j < COLUMN_COUNT; j++) { tuple.set(j, "a" + i + "_" + j); } storer.putNext(tuple); } recordWriter.close(taskAttemptContext); OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskAttemptContext); outputCommitter.commitTask(taskAttemptContext); outputCommitter.commitJob(jobContext); } }
From source file:org.apache.pig.impl.io.PigFile.java
License:Apache License
public void store(DataBag data, FuncSpec storeFuncSpec, PigContext pigContext) throws IOException { Configuration conf = ConfigurationUtil.toConfiguration(pigContext.getProperties()); // create a simulated JobContext JobContext jc = HadoopShims.createJobContext(conf, new JobID()); StoreFuncInterface sfunc = (StoreFuncInterface) PigContext.instantiateFuncFromSpec(storeFuncSpec); OutputFormat<?, ?> of = sfunc.getOutputFormat(); POStore store = new POStore(new OperatorKey()); store.setSFile(new FileSpec(file, storeFuncSpec)); PigOutputFormat.setLocation(jc, store); OutputCommitter oc; // create a simulated TaskAttemptContext TaskAttemptContext tac = HadoopShims.createTaskAttemptContext(conf, HadoopShims.getNewTaskAttemptID()); PigOutputFormat.setLocation(tac, store); RecordWriter<?, ?> rw;/*from w ww . j a v a2 s. co m*/ try { of.checkOutputSpecs(jc); oc = of.getOutputCommitter(tac); oc.setupJob(jc); oc.setupTask(tac); rw = of.getRecordWriter(tac); sfunc.prepareToWrite(rw); for (Iterator<Tuple> it = data.iterator(); it.hasNext();) { Tuple row = it.next(); sfunc.putNext(row); } rw.close(tac); } catch (InterruptedException e) { throw new IOException(e); } if (oc.needsTaskCommit(tac)) { oc.commitTask(tac); } HadoopShims.commitOrCleanup(oc, jc); }