List of usage examples for org.apache.hadoop.mapreduce OutputCommitter needsTaskCommit
public abstract boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException;
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
private void doCommitTask(TaskAttemptContext context, OutputCommitter committer) throws IOException { if (committer.needsTaskCommit(context)) { committer.commitTask(context);/*from ww w .j a v a2 s . co m*/ } }
From source file:org.apache.flink.api.java.hadoop.mapreduce.HadoopOutputFormatTest.java
License:Apache License
private OutputCommitter setupOutputCommitter(boolean needsTaskCommit) throws IOException { OutputCommitter outputCommitter = Mockito.mock(OutputCommitter.class); when(outputCommitter.needsTaskCommit(any(TaskAttemptContext.class))).thenReturn(needsTaskCommit); doNothing().when(outputCommitter).commitTask(any(TaskAttemptContext.class)); return outputCommitter; }
From source file:org.apache.giraph.io.internal.WrappedEdgeOutputFormat.java
License:Apache License
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { final OutputCommitter outputCommitter = originalOutputFormat .getOutputCommitter(HadoopUtils.makeTaskAttemptContext(getConf(), context)); return new OutputCommitter() { @Override/* w w w .ja v a 2 s.c o m*/ public void setupJob(JobContext context) throws IOException { outputCommitter.setupJob(HadoopUtils.makeJobContext(getConf(), context)); } @Override public void setupTask(TaskAttemptContext context) throws IOException { outputCommitter.setupTask(HadoopUtils.makeTaskAttemptContext(getConf(), context)); } @Override public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { return outputCommitter.needsTaskCommit(HadoopUtils.makeTaskAttemptContext(getConf(), context)); } @Override public void commitTask(TaskAttemptContext context) throws IOException { outputCommitter.commitTask(HadoopUtils.makeTaskAttemptContext(getConf(), context)); } @Override public void abortTask(TaskAttemptContext context) throws IOException { outputCommitter.abortTask(HadoopUtils.makeTaskAttemptContext(getConf(), context)); } @Override public void cleanupJob(JobContext context) throws IOException { outputCommitter.cleanupJob(HadoopUtils.makeJobContext(getConf(), context)); } /*if_not[HADOOP_NON_COMMIT_JOB]*/ @Override public void commitJob(JobContext context) throws IOException { outputCommitter.commitJob(HadoopUtils.makeJobContext(getConf(), context)); } @Override public void abortJob(JobContext context, JobStatus.State state) throws IOException { outputCommitter.abortJob(HadoopUtils.makeJobContext(getConf(), context), state); } /*end[HADOOP_NON_COMMIT_JOB]*/ }; }
From source file:org.apache.giraph.io.internal.WrappedVertexOutputFormat.java
License:Apache License
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { final OutputCommitter outputCommitter = originalOutputFormat .getOutputCommitter(HadoopUtils.makeTaskAttemptContext(getConf(), context)); return new OutputCommitter() { @Override/* www . ja v a 2s .c om*/ public void setupJob(JobContext context) throws IOException { outputCommitter.setupJob(HadoopUtils.makeJobContext(getConf(), context)); } @Override public void setupTask(TaskAttemptContext context) throws IOException { outputCommitter.setupTask(HadoopUtils.makeTaskAttemptContext(getConf(), context)); } @Override public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { return outputCommitter.needsTaskCommit(HadoopUtils.makeTaskAttemptContext(getConf(), context)); } @Override public void commitTask(TaskAttemptContext context) throws IOException { outputCommitter.commitTask(HadoopUtils.makeTaskAttemptContext(getConf(), context)); } @Override public void abortTask(TaskAttemptContext context) throws IOException { outputCommitter.abortTask(HadoopUtils.makeTaskAttemptContext(getConf(), context)); } @Override public void cleanupJob(JobContext context) throws IOException { outputCommitter.cleanupJob(HadoopUtils.makeJobContext(getConf(), context)); } /*if_not[HADOOP_NON_COMMIT_JOB]*/ @Override public void commitJob(JobContext context) throws IOException { outputCommitter.commitJob(HadoopUtils.makeJobContext(getConf(), context)); } @Override public void abortJob(JobContext context, JobStatus.State state) throws IOException { outputCommitter.abortJob(HadoopUtils.makeJobContext(getConf(), context), state); } /*end[HADOOP_NON_COMMIT_JOB]*/ }; }
From source file:org.apache.giraph.worker.BspServiceSource.java
License:Apache License
/** * Save the vertices using the user-defined VertexOutputFormat from our * vertexArray based on the split.//from w w w .j a v a 2 s . c o m * * @param numLocalVertices Number of local vertices * @throws InterruptedException */ private void saveVertices(long numLocalVertices) throws IOException, InterruptedException { ImmutableClassesGiraphConfiguration<I, V, E> conf = getConfiguration(); if (conf.getVertexOutputFormatClass() == null) { LOG.warn("saveVertices: " + GiraphConstants.VERTEX_OUTPUT_FORMAT_CLASS + " not specified -- there will be no saved output"); return; } if (conf.doOutputDuringComputation()) { if (LOG.isInfoEnabled()) { LOG.info("saveVertices: The option for doing output during " + "computation is selected, so there will be no saving of the " + "output in the end of application"); } return; } final int numPartitions = getPartitionStore().getNumPartitions(); int numThreads = Math.min(getConfiguration().getNumOutputThreads(), numPartitions); LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Starting to save " + numLocalVertices + " vertices " + "using " + numThreads + " threads"); final VertexOutputFormat<I, V, E> vertexOutputFormat = getConfiguration().createWrappedVertexOutputFormat(); final Queue<Integer> partitionIdQueue = (numPartitions == 0) ? new LinkedList<Integer>() : new ArrayBlockingQueue<Integer>(numPartitions); Iterables.addAll(partitionIdQueue, getPartitionStore().getPartitionIds()); long verticesToStore = 0; PartitionStore<I, V, E> partitionStore = getPartitionStore(); for (int partitionId : partitionStore.getPartitionIds()) { Partition<I, V, E> partition = partitionStore.getOrCreatePartition(partitionId); verticesToStore += partition.getVertexCount(); partitionStore.putPartition(partition); } WorkerProgress.get().startStoring(verticesToStore, getPartitionStore().getNumPartitions()); CallableFactory<Void> callableFactory = new CallableFactory<Void>() { @Override public Callable<Void> newCallable(int callableId) { return new Callable<Void>() { /** How often to update WorkerProgress */ private static final long VERTICES_TO_UPDATE_PROGRESS = 100000; @Override public Void call() throws Exception { VertexWriter<I, V, E> vertexWriter = vertexOutputFormat.createVertexWriter(getContext()); vertexWriter.setConf(getConfiguration()); vertexWriter.initialize(getContext()); long nextPrintVertices = 0; long nextUpdateProgressVertices = VERTICES_TO_UPDATE_PROGRESS; long nextPrintMsecs = System.currentTimeMillis() + 15000; int partitionIndex = 0; int numPartitions = getPartitionStore().getNumPartitions(); while (!partitionIdQueue.isEmpty()) { Integer partitionId = partitionIdQueue.poll(); if (partitionId == null) { break; } Partition<I, V, E> partition = getPartitionStore().getOrCreatePartition(partitionId); long verticesWritten = 0; for (Vertex<I, V, E> vertex : partition) { vertexWriter.writeVertex(vertex); ++verticesWritten; // Update status at most every 250k vertices or 15 seconds if (verticesWritten > nextPrintVertices && System.currentTimeMillis() > nextPrintMsecs) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Saved " + verticesWritten + " out of " + partition.getVertexCount() + " partition vertices, " + "on partition " + partitionIndex + " out of " + numPartitions); nextPrintMsecs = System.currentTimeMillis() + 15000; nextPrintVertices = verticesWritten + 250000; } if (verticesWritten >= nextUpdateProgressVertices) { WorkerProgress.get().addVerticesStored(VERTICES_TO_UPDATE_PROGRESS); nextUpdateProgressVertices += VERTICES_TO_UPDATE_PROGRESS; } } getPartitionStore().putPartition(partition); ++partitionIndex; WorkerProgress.get().addVerticesStored(verticesWritten % VERTICES_TO_UPDATE_PROGRESS); WorkerProgress.get().incrementPartitionsStored(); } vertexWriter.close(getContext()); // the temp results are saved now return null; } }; } }; ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext()); LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Done saving vertices."); // YARN: must complete the commit the "task" output, Hadoop isn't there. if (getConfiguration().isPureYarnJob() && getConfiguration().getVertexOutputFormatClass() != null) { try { OutputCommitter outputCommitter = vertexOutputFormat.getOutputCommitter(getContext()); if (outputCommitter.needsTaskCommit(getContext())) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "OutputCommitter: committing task output."); // transfer from temp dirs to "task commit" dirs to prep for // the master's OutputCommitter#commitJob(context) call to finish. outputCommitter.commitTask(getContext()); } } catch (InterruptedException ie) { LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie); } catch (IOException ioe) { LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe); } } }
From source file:org.apache.giraph.worker.BspServiceSource.java
License:Apache License
/** * Save the edges using the user-defined EdgeOutputFormat from our * vertexArray based on the split./* ww w. java 2 s. c o m*/ * * @throws InterruptedException */ private void saveEdges() throws IOException, InterruptedException { final ImmutableClassesGiraphConfiguration<I, V, E> conf = getConfiguration(); if (conf.getEdgeOutputFormatClass() == null) { LOG.warn("saveEdges: " + GiraphConstants.EDGE_OUTPUT_FORMAT_CLASS + "Make sure that the EdgeOutputFormat is not required."); return; } final int numPartitions = getPartitionStore().getNumPartitions(); int numThreads = Math.min(conf.getNumOutputThreads(), numPartitions); LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveEdges: Starting to save the edges using " + numThreads + " threads"); final EdgeOutputFormat<I, V, E> edgeOutputFormat = conf.createWrappedEdgeOutputFormat(); final Queue<Integer> partitionIdQueue = (numPartitions == 0) ? new LinkedList<Integer>() : new ArrayBlockingQueue<Integer>(numPartitions); Iterables.addAll(partitionIdQueue, getPartitionStore().getPartitionIds()); CallableFactory<Void> callableFactory = new CallableFactory<Void>() { @Override public Callable<Void> newCallable(int callableId) { return new Callable<Void>() { @Override public Void call() throws Exception { EdgeWriter<I, V, E> edgeWriter = edgeOutputFormat.createEdgeWriter(getContext()); edgeWriter.setConf(conf); edgeWriter.initialize(getContext()); long nextPrintVertices = 0; long nextPrintMsecs = System.currentTimeMillis() + 15000; int partitionIndex = 0; int numPartitions = getPartitionStore().getNumPartitions(); while (!partitionIdQueue.isEmpty()) { Integer partitionId = partitionIdQueue.poll(); if (partitionId == null) { break; } Partition<I, V, E> partition = getPartitionStore().getOrCreatePartition(partitionId); long vertices = 0; long edges = 0; long partitionEdgeCount = partition.getEdgeCount(); for (Vertex<I, V, E> vertex : partition) { for (Edge<I, E> edge : vertex.getEdges()) { edgeWriter.writeEdge(vertex.getId(), vertex.getValue(), edge); ++edges; } ++vertices; // Update status at most every 250k vertices or 15 seconds if (vertices > nextPrintVertices && System.currentTimeMillis() > nextPrintMsecs) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveEdges: Saved " + edges + " edges out of " + partitionEdgeCount + " partition edges, on partition " + partitionIndex + " out of " + numPartitions); nextPrintMsecs = System.currentTimeMillis() + 15000; nextPrintVertices = vertices + 250000; } } getPartitionStore().putPartition(partition); ++partitionIndex; } edgeWriter.close(getContext()); // the temp results are saved now return null; } }; } }; ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext()); LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveEdges: Done saving edges."); // YARN: must complete the commit the "task" output, Hadoop isn't there. if (conf.isPureYarnJob() && conf.getVertexOutputFormatClass() != null) { try { OutputCommitter outputCommitter = edgeOutputFormat.getOutputCommitter(getContext()); if (outputCommitter.needsTaskCommit(getContext())) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "OutputCommitter: committing task output."); // transfer from temp dirs to "task commit" dirs to prep for // the master's OutputCommitter#commitJob(context) call to finish. outputCommitter.commitTask(getContext()); } } catch (InterruptedException ie) { LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie); } catch (IOException ioe) { LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe); } } }
From source file:org.apache.giraph.worker.BspServiceWorkerForCohesiveSubgraph.java
License:Apache License
/** * Save the vertices using the user-defined VertexOutputFormat from our * vertexArray based on the split.// ww w . ja va 2 s.co m * * @param numLocalVertices Number of local vertices * @throws InterruptedException */ private void saveVertices(long numLocalVertices) throws IOException, InterruptedException { if (getConfiguration().getVertexOutputFormatClass() == null) { LOG.warn("saveVertices: " + GiraphConstants.VERTEX_OUTPUT_FORMAT_CLASS + " not specified -- there will be no saved output"); return; } if (getConfiguration().doOutputDuringComputation()) { if (LOG.isInfoEnabled()) { LOG.info("saveVertices: The option for doing output during " + "computation is selected, so there will be no saving of the " + "output in the end of application"); } return; } int numThreads = Math.min(getConfiguration().getNumOutputThreads(), getPartitionStore().getNumPartitions()); LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Starting to save " + numLocalVertices + " vertices " + "using " + numThreads + " threads"); final VertexOutputFormat<I, V, E> vertexOutputFormat = getConfiguration().createVertexOutputFormat(); CallableFactory<Void> callableFactory = new CallableFactory<Void>() { @Override public Callable<Void> newCallable(int callableId) { return new Callable<Void>() { @Override public Void call() throws Exception { VertexWriter<I, V, E> vertexWriter = vertexOutputFormat.createVertexWriter(getContext()); vertexWriter.setConf( (ImmutableClassesGiraphConfiguration<I, V, E, Writable>) getConfiguration()); vertexWriter.initialize(getContext()); long verticesWritten = 0; long nextPrintVertices = 0; long nextPrintMsecs = System.currentTimeMillis() + 15000; int partitionIndex = 0; int numPartitions = getPartitionStore().getNumPartitions(); for (Integer partitionId : getPartitionStore().getPartitionIds()) { Partition<I, V, E, M> partition = getPartitionStore().getPartition(partitionId); if (getConfiguration().getBoolean("giraph.ktruss.subgraph", false)) { /** * Special for the ktruss output */ BasicGraphStoreInterface gs = (BasicGraphStoreInterface) (partition); for (BasicVertex rv : gs.getLocalVertex()) { Vertex<I, V, E, M> vertex = getConfiguration().createVertex(); List<Edge<I, E>> edges = Lists.newLinkedList(); for (BasicEdge nb : rv.getNeighbors()) { edges.add(EdgeFactory.create(((I) new IntWritable(nb.getTargetId())), ((E) NullWritable.get()))); } vertex.initialize((I) (new IntWritable(rv.getId())), ((V) new IntWritable(0)), edges); vertexWriter.writeVertex(vertex); ++verticesWritten; // Update status at most every 250k vertices or 15 seconds if (verticesWritten > nextPrintVertices && System.currentTimeMillis() > nextPrintMsecs) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Saved " + verticesWritten + " out of " + partition.getVertexCount() + " partition vertices, " + "on partition " + partitionIndex + " out of " + numPartitions); nextPrintMsecs = System.currentTimeMillis() + 15000; nextPrintVertices = verticesWritten + 250000; } } } else { for (Vertex<I, V, E, M> vertex : partition) { vertexWriter.writeVertex(vertex); ++verticesWritten; // Update status at most every 250k vertices or 15 seconds if (verticesWritten > nextPrintVertices && System.currentTimeMillis() > nextPrintMsecs) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Saved " + verticesWritten + " out of " + partition.getVertexCount() + " partition vertices, " + "on partition " + partitionIndex + " out of " + numPartitions); nextPrintMsecs = System.currentTimeMillis() + 15000; nextPrintVertices = verticesWritten + 250000; } } } ++partitionIndex; } vertexWriter.close(getContext()); // the temp results are saved now return null; } }; } }; ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext()); LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Done saving vertices."); // YARN: must complete the commit the "task" output, Hadoop isn't there. if (getConfiguration().isPureYarnJob() && getConfiguration().getVertexOutputFormatClass() != null) { try { OutputCommitter outputCommitter = vertexOutputFormat.getOutputCommitter(getContext()); if (outputCommitter.needsTaskCommit(getContext())) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "OutputCommitter: committing task output."); // transfer from temp dirs to "task commit" dirs to prep for // the master's OutputCommitter#commitJob(context) call to finish. outputCommitter.commitTask(getContext()); } } catch (InterruptedException ie) { LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie); } catch (IOException ioe) { LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe); } } }
From source file:org.apache.hcatalog.data.transfer.impl.HCatOutputFormatWriter.java
License:Apache License
@Override public void write(Iterator<HCatRecord> recordItr) throws HCatException { int id = sp.getId(); setVarsInConf(id);/* w w w . j ava2s . c o m*/ HCatOutputFormat outFormat = new HCatOutputFormat(); TaskAttemptContext cntxt = HCatHadoopShims.Instance.get().createTaskAttemptContext(conf, new TaskAttemptID(HCatHadoopShims.Instance.get().createTaskID(), id)); OutputCommitter committer = null; RecordWriter<WritableComparable<?>, HCatRecord> writer; try { committer = outFormat.getOutputCommitter(cntxt); committer.setupTask(cntxt); writer = outFormat.getRecordWriter(cntxt); while (recordItr.hasNext()) { HCatRecord rec = recordItr.next(); writer.write(null, rec); } writer.close(cntxt); if (committer.needsTaskCommit(cntxt)) { committer.commitTask(cntxt); } } catch (IOException e) { if (null != committer) { try { committer.abortTask(cntxt); } catch (IOException e1) { throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } } throw new HCatException("Failed while writing", e); } catch (InterruptedException e) { if (null != committer) { try { committer.abortTask(cntxt); } catch (IOException e1) { throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } } throw new HCatException("Failed while writing", e); } }
From source file:org.apache.hcatalog.mapreduce.FileRecordWriterContainer.java
License:Apache License
@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { Reporter reporter = InternalUtil.createReporter(context); if (dynamicPartitioningUsed) { for (org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> bwriter : baseDynamicWriters .values()) {// ww w . j a va 2s. c om //We are in RecordWriter.close() make sense that the context would be TaskInputOutput bwriter.close(reporter); } for (Map.Entry<String, org.apache.hadoop.mapred.OutputCommitter> entry : baseDynamicCommitters .entrySet()) { org.apache.hadoop.mapred.TaskAttemptContext currContext = dynamicContexts.get(entry.getKey()); OutputCommitter baseOutputCommitter = entry.getValue(); if (baseOutputCommitter.needsTaskCommit(currContext)) { baseOutputCommitter.commitTask(currContext); } } } else { getBaseRecordWriter().close(reporter); } }
From source file:org.apache.hive.hcatalog.data.transfer.impl.HCatOutputFormatWriter.java
License:Apache License
@Override public void write(Iterator<HCatRecord> recordItr) throws HCatException { int id = sp.getId(); setVarsInConf(id);// w ww. ja v a 2s . co m HCatOutputFormat outFormat = new HCatOutputFormat(); TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id)); OutputCommitter committer = null; RecordWriter<WritableComparable<?>, HCatRecord> writer; try { committer = outFormat.getOutputCommitter(cntxt); committer.setupTask(cntxt); writer = outFormat.getRecordWriter(cntxt); while (recordItr.hasNext()) { HCatRecord rec = recordItr.next(); writer.write(null, rec); } writer.close(cntxt); if (committer.needsTaskCommit(cntxt)) { committer.commitTask(cntxt); } } catch (IOException e) { if (null != committer) { try { committer.abortTask(cntxt); } catch (IOException e1) { throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } } throw new HCatException("Failed while writing", e); } catch (InterruptedException e) { if (null != committer) { try { committer.abortTask(cntxt); } catch (IOException e1) { throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } } throw new HCatException("Failed while writing", e); } }