Example usage for org.apache.hadoop.mapreduce OutputCommitter commitTask

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce OutputCommitter commitTask.

Prototype

public abstract void commitTask(TaskAttemptContext taskContext) throws IOException;

Source Link

Document

To promote the task's temporary output to final output location.

Usage

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

private void doCommitTask(TaskAttemptContext context, OutputCommitter committer) throws IOException {
    if (committer.needsTaskCommit(context)) {
        committer.commitTask(context);
    }/*ww w  . ja  va 2 s  .  c  o  m*/
}

From source file:com.asakusafw.testdriver.file.FileOutputFormatDriver.java

License:Apache License

@Override
public void close() throws IOException {
    LOG.debug("Committing output results: {}", format.getClass().getName());
    try {//from  ww w.  j a  va2 s .  c om
        writer.close(context);
        OutputCommitter comitter = format.getOutputCommitter(context);
        comitter.commitTask(context);
        comitter.commitJob(context);
    } catch (InterruptedException e) {
        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
    }
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Creates an lzo file with random data.
 * // w ww  .  ja v a  2 s  .  co  m
 * @param outputDir Output directory.
 * @param fs File system we're using.
 * @param attemptContext Task attempt context, contains task id etc. 
 * @throws IOException
 * @throws InterruptedException
 */
private byte[] createTestInput(Path outputDir, FileSystem fs, TaskAttemptContext attemptContext,
        int charsToOutput) throws IOException, InterruptedException {

    TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>();
    RecordWriter<Text, Text> rw = null;

    md5.reset();

    try {
        rw = output.getRecordWriter(attemptContext);

        char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray();

        Random r = new Random(System.currentTimeMillis());
        Text key = new Text();
        Text value = new Text();
        int charsMax = chars.length - 1;
        for (int i = 0; i < charsToOutput;) {
            i += fillText(chars, r, charsMax, key);
            i += fillText(chars, r, charsMax, value);
            rw.write(key, value);
            md5.update(key.getBytes(), 0, key.getLength());
            // text output format writes tab between the key and value
            md5.update("\t".getBytes("UTF-8"));
            md5.update(value.getBytes(), 0, value.getLength());
        }
    } finally {
        if (rw != null) {
            rw.close(attemptContext);
            OutputCommitter committer = output.getOutputCommitter(attemptContext);
            committer.commitTask(attemptContext);
            committer.cleanupJob(attemptContext);
        }
    }

    byte[] result = md5.digest();
    md5.reset();
    return result;
}

From source file:com.marklogic.contentpump.LocalJobRunner.java

License:Apache License

/**
 * Run the job.  Get the input splits, create map tasks and submit it to
 * the thread pool if there is one; otherwise, runs the the task one by
 * one./*  w  ww. ja va 2s  . com*/
 * 
 * @param <INKEY>
 * @param <INVALUE>
 * @param <OUTKEY>
 * @param <OUTVALUE>
 * @throws Exception
 */
@SuppressWarnings("unchecked")
public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run()
        throws Exception {
    Configuration conf = job.getConfiguration();
    InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils
            .newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = inputFormat.getSplits(job);
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // goes first
    Arrays.sort(array, new SplitLengthComparator());
    OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(job.getOutputFormatClass(), conf);
    Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass();
    Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(mapperClass, conf);
    try {
        outputFormat.checkOutputSpecs(job);
    } catch (Exception ex) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error checking output specification: ", ex);
        } else {
            LOG.error("Error checking output specification: ");
            LOG.error(ex.getMessage());
        }
        return;
    }
    conf = job.getConfiguration();
    progress = new AtomicInteger[splits.size()];
    for (int i = 0; i < splits.size(); i++) {
        progress[i] = new AtomicInteger();
    }
    Monitor monitor = new Monitor();
    monitor.start();
    reporter = new ContentPumpReporter();
    List<Future<Object>> taskList = new ArrayList<Future<Object>>();
    for (int i = 0; i < array.length; i++) {
        InputSplit split = array[i];
        if (pool != null) {
            LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>(
                    inputFormat, outputFormat, conf, i, split, reporter, progress[i]);
            availableThreads = assignThreads(i, array.length);
            Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass();
            if (availableThreads > 1 && availableThreads != threadsPerSplit) {
                // possible runtime adjustment
                if (runtimeMapperClass != (Class) MultithreadedMapper.class) {
                    runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd
                            .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads);
                }
                if (runtimeMapperClass != mapperClass) {
                    task.setMapperClass(runtimeMapperClass);
                }
                if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                    task.setThreadCount(availableThreads);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Thread Count for Split#" + i + " : " + availableThreads);
                    }
                }
            }

            if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                synchronized (pool) {
                    taskList.add(pool.submit(task));
                    pool.wait();
                }
            } else {
                pool.submit(task);
            }
        } else { // single-threaded
            JobID jid = new JobID();
            TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i);
            TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0);
            TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId);
            RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context);
            RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context);
            OutputCommitter committer = outputFormat.getOutputCommitter(context);
            TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]);

            Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId,
                    trackingReader, writer, committer, reporter, split);

            trackingReader.initialize(split, mapperContext);

            // no thread pool (only 1 thread specified)
            Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass();
            mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class);
            mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass,
                    mapperContext.getConfiguration());
            mapper.run(mapperContext);
            trackingReader.close();
            writer.close(mapperContext);
            committer.commitTask(context);
        }
    }
    // wait till all tasks are done
    if (pool != null) {
        for (Future<Object> f : taskList) {
            f.get();
        }
        pool.shutdown();
        while (!pool.awaitTermination(1, TimeUnit.DAYS))
            ;
        jobComplete.set(true);
    }
    monitor.interrupt();
    monitor.join(1000);

    // report counters
    Iterator<CounterGroup> groupIt = reporter.counters.iterator();
    while (groupIt.hasNext()) {
        CounterGroup group = groupIt.next();
        LOG.info(group.getDisplayName() + ": ");
        Iterator<Counter> counterIt = group.iterator();
        while (counterIt.hasNext()) {
            Counter counter = counterIt.next();
            LOG.info(counter.getDisplayName() + ": " + counter.getValue());
        }
    }
    LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec");
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapreduce.java

License:Apache License

/**
 * Runs mapper for the single split.//from  w w  w  .  j a va  2  s .co m
 *
 * @param mapOutputAccumulator mapOutputAccumulator to use
 * @param split    split ot run on
 */

@Override
@SuppressWarnings("unchecked")
public void runSplit(MapOutputAccumulator<OUTKEY, OUTVALUE> mapOutputAccumulator, Object split, int splitIndex)
        throws IOException, ClassNotFoundException, InterruptedException {

    TaskAttemptID taskAttemptId = hadoopVersionSpecificCode.createTaskAttemptId(jobId, true, splitIndex);
    //Setup task ID info
    TaskAttemptContext taskContext = hadoopVersionSpecificCode.createTaskAttemptContext(configuration,
            taskAttemptId);

    InputFormat inputFormat = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), configuration);

    //Create RecordReader
    org.apache.hadoop.mapreduce.RecordReader<INKEY, INVALUE> input = inputFormat
            .createRecordReader((InputSplit) split, taskContext);

    //Make a mapper
    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper;
    try {
        mapper = (org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) mapperConstructor
                .newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    org.apache.hadoop.mapreduce.RecordWriter output;
    OutputCommitter committer = null;
    if (mapOnlyJob) {
        OutputFormat outputFormat = ReflectionUtils.newInstance(jobContext.getOutputFormatClass(),
                configuration);
        output = (org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE>) outputFormat
                .getRecordWriter(taskContext);
        committer = outputFormat.getOutputCommitter(taskContext);
        committer.setupTask(taskContext);
    } else {
        output = new MapOutputCollector<OUTKEY, OUTVALUE>(mapOutputAccumulator);
    }

    input.initialize((InputSplit) split, taskContext);

    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context mapperContext = hadoopVersionSpecificCode
            .getMapperContext(configuration, taskAttemptId, input, output);
    mapper.run(mapperContext);

    input.close();

    output.close(mapperContext);

    if (mapOnlyJob && committer != null) {
        committer.commitTask(taskContext);
    }
}

From source file:cz.seznam.euphoria.hadoop.output.TestDataSinkOutputFormat.java

License:Apache License

@Test
@SuppressWarnings("unchecked")
/**/*from   ww w . j  av  a 2  s  . c o  m*/
 * Test that {@code ListDataSink} can be used in place of hadoop {@code OutputFormat}.
 **/
public void testDataSink() throws Exception {
    DummySink sink = new DummySink();
    Configuration conf = new Configuration();
    DataSinkOutputFormat.configure(conf, sink);

    // mock the instances we will need
    TaskAttemptContext first = mockContext(conf, 0);
    TaskAttemptContext second = mockContext(conf, 1);

    // instantiate the output format
    DataSinkOutputFormat<Long> format = DataSinkOutputFormat.class.newInstance();

    // validate
    format.checkOutputSpecs(first);

    // create record writer for the first partition
    RecordWriter<NullWritable, Long> writer = format.getRecordWriter(first);
    writer.write(NullWritable.get(), 2L);
    writer.close(first);
    format.getOutputCommitter(first).commitTask(first);

    // now the second partition, we need to create new instance of output format
    format = DataSinkOutputFormat.class.newInstance();
    // validate
    format.checkOutputSpecs(second);

    // create record writer for the second partition
    writer = format.getRecordWriter(second);
    writer.write(NullWritable.get(), 4L);
    writer.close(second);
    OutputCommitter committer = format.getOutputCommitter(second);
    committer.commitTask(second);

    // and now validate what was written
    assertFalse(DummySink.isCommitted);

    committer.commitJob(second);
    assertTrue(DummySink.isCommitted);

    assertTrue(DummySink.outputs.isEmpty());
    assertEquals(2, DummySink.committed.size());

    assertEquals(Arrays.asList(2L), DummySink.committed.get(0));
    assertEquals(Arrays.asList(4L), DummySink.committed.get(1));
}

From source file:org.apache.giraph.io.internal.WrappedEdgeOutputFormat.java

License:Apache License

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {

    final OutputCommitter outputCommitter = originalOutputFormat
            .getOutputCommitter(HadoopUtils.makeTaskAttemptContext(getConf(), context));

    return new OutputCommitter() {
        @Override/*from  ww w. j  a  va 2s . c o m*/
        public void setupJob(JobContext context) throws IOException {
            outputCommitter.setupJob(HadoopUtils.makeJobContext(getConf(), context));
        }

        @Override
        public void setupTask(TaskAttemptContext context) throws IOException {
            outputCommitter.setupTask(HadoopUtils.makeTaskAttemptContext(getConf(), context));
        }

        @Override
        public boolean needsTaskCommit(TaskAttemptContext context) throws IOException {
            return outputCommitter.needsTaskCommit(HadoopUtils.makeTaskAttemptContext(getConf(), context));
        }

        @Override
        public void commitTask(TaskAttemptContext context) throws IOException {
            outputCommitter.commitTask(HadoopUtils.makeTaskAttemptContext(getConf(), context));
        }

        @Override
        public void abortTask(TaskAttemptContext context) throws IOException {
            outputCommitter.abortTask(HadoopUtils.makeTaskAttemptContext(getConf(), context));
        }

        @Override
        public void cleanupJob(JobContext context) throws IOException {
            outputCommitter.cleanupJob(HadoopUtils.makeJobContext(getConf(), context));
        }

        /*if_not[HADOOP_NON_COMMIT_JOB]*/
        @Override
        public void commitJob(JobContext context) throws IOException {
            outputCommitter.commitJob(HadoopUtils.makeJobContext(getConf(), context));
        }

        @Override
        public void abortJob(JobContext context, JobStatus.State state) throws IOException {
            outputCommitter.abortJob(HadoopUtils.makeJobContext(getConf(), context), state);
        }
        /*end[HADOOP_NON_COMMIT_JOB]*/
    };
}

From source file:org.apache.giraph.io.internal.WrappedVertexOutputFormat.java

License:Apache License

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
    final OutputCommitter outputCommitter = originalOutputFormat
            .getOutputCommitter(HadoopUtils.makeTaskAttemptContext(getConf(), context));
    return new OutputCommitter() {
        @Override/*from  www. j  a v a  2s.c o m*/
        public void setupJob(JobContext context) throws IOException {
            outputCommitter.setupJob(HadoopUtils.makeJobContext(getConf(), context));
        }

        @Override
        public void setupTask(TaskAttemptContext context) throws IOException {
            outputCommitter.setupTask(HadoopUtils.makeTaskAttemptContext(getConf(), context));
        }

        @Override
        public boolean needsTaskCommit(TaskAttemptContext context) throws IOException {
            return outputCommitter.needsTaskCommit(HadoopUtils.makeTaskAttemptContext(getConf(), context));
        }

        @Override
        public void commitTask(TaskAttemptContext context) throws IOException {
            outputCommitter.commitTask(HadoopUtils.makeTaskAttemptContext(getConf(), context));
        }

        @Override
        public void abortTask(TaskAttemptContext context) throws IOException {
            outputCommitter.abortTask(HadoopUtils.makeTaskAttemptContext(getConf(), context));
        }

        @Override
        public void cleanupJob(JobContext context) throws IOException {
            outputCommitter.cleanupJob(HadoopUtils.makeJobContext(getConf(), context));
        }

        /*if_not[HADOOP_NON_COMMIT_JOB]*/
        @Override
        public void commitJob(JobContext context) throws IOException {
            outputCommitter.commitJob(HadoopUtils.makeJobContext(getConf(), context));
        }

        @Override
        public void abortJob(JobContext context, JobStatus.State state) throws IOException {
            outputCommitter.abortJob(HadoopUtils.makeJobContext(getConf(), context), state);
        }
        /*end[HADOOP_NON_COMMIT_JOB]*/
    };
}

From source file:org.apache.giraph.worker.BspServiceSource.java

License:Apache License

/**
 * Save the vertices using the user-defined VertexOutputFormat from our
 * vertexArray based on the split./*  w w  w  .ja va2 s. c o m*/
 *
 * @param numLocalVertices Number of local vertices
 * @throws InterruptedException
 */
private void saveVertices(long numLocalVertices) throws IOException, InterruptedException {
    ImmutableClassesGiraphConfiguration<I, V, E> conf = getConfiguration();

    if (conf.getVertexOutputFormatClass() == null) {
        LOG.warn("saveVertices: " + GiraphConstants.VERTEX_OUTPUT_FORMAT_CLASS
                + " not specified -- there will be no saved output");
        return;
    }
    if (conf.doOutputDuringComputation()) {
        if (LOG.isInfoEnabled()) {
            LOG.info("saveVertices: The option for doing output during "
                    + "computation is selected, so there will be no saving of the "
                    + "output in the end of application");
        }
        return;
    }

    final int numPartitions = getPartitionStore().getNumPartitions();
    int numThreads = Math.min(getConfiguration().getNumOutputThreads(), numPartitions);
    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Starting to save "
            + numLocalVertices + " vertices " + "using " + numThreads + " threads");
    final VertexOutputFormat<I, V, E> vertexOutputFormat = getConfiguration().createWrappedVertexOutputFormat();

    final Queue<Integer> partitionIdQueue = (numPartitions == 0) ? new LinkedList<Integer>()
            : new ArrayBlockingQueue<Integer>(numPartitions);
    Iterables.addAll(partitionIdQueue, getPartitionStore().getPartitionIds());

    long verticesToStore = 0;
    PartitionStore<I, V, E> partitionStore = getPartitionStore();
    for (int partitionId : partitionStore.getPartitionIds()) {
        Partition<I, V, E> partition = partitionStore.getOrCreatePartition(partitionId);
        verticesToStore += partition.getVertexCount();
        partitionStore.putPartition(partition);
    }
    WorkerProgress.get().startStoring(verticesToStore, getPartitionStore().getNumPartitions());

    CallableFactory<Void> callableFactory = new CallableFactory<Void>() {
        @Override
        public Callable<Void> newCallable(int callableId) {
            return new Callable<Void>() {
                /** How often to update WorkerProgress */
                private static final long VERTICES_TO_UPDATE_PROGRESS = 100000;

                @Override
                public Void call() throws Exception {
                    VertexWriter<I, V, E> vertexWriter = vertexOutputFormat.createVertexWriter(getContext());
                    vertexWriter.setConf(getConfiguration());
                    vertexWriter.initialize(getContext());
                    long nextPrintVertices = 0;
                    long nextUpdateProgressVertices = VERTICES_TO_UPDATE_PROGRESS;
                    long nextPrintMsecs = System.currentTimeMillis() + 15000;
                    int partitionIndex = 0;
                    int numPartitions = getPartitionStore().getNumPartitions();
                    while (!partitionIdQueue.isEmpty()) {
                        Integer partitionId = partitionIdQueue.poll();
                        if (partitionId == null) {
                            break;
                        }

                        Partition<I, V, E> partition = getPartitionStore().getOrCreatePartition(partitionId);
                        long verticesWritten = 0;
                        for (Vertex<I, V, E> vertex : partition) {
                            vertexWriter.writeVertex(vertex);
                            ++verticesWritten;

                            // Update status at most every 250k vertices or 15 seconds
                            if (verticesWritten > nextPrintVertices
                                    && System.currentTimeMillis() > nextPrintMsecs) {
                                LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                                        "saveVertices: Saved " + verticesWritten + " out of "
                                                + partition.getVertexCount() + " partition vertices, "
                                                + "on partition " + partitionIndex + " out of "
                                                + numPartitions);
                                nextPrintMsecs = System.currentTimeMillis() + 15000;
                                nextPrintVertices = verticesWritten + 250000;
                            }

                            if (verticesWritten >= nextUpdateProgressVertices) {
                                WorkerProgress.get().addVerticesStored(VERTICES_TO_UPDATE_PROGRESS);
                                nextUpdateProgressVertices += VERTICES_TO_UPDATE_PROGRESS;
                            }
                        }
                        getPartitionStore().putPartition(partition);
                        ++partitionIndex;
                        WorkerProgress.get().addVerticesStored(verticesWritten % VERTICES_TO_UPDATE_PROGRESS);
                        WorkerProgress.get().incrementPartitionsStored();
                    }
                    vertexWriter.close(getContext()); // the temp results are saved now
                    return null;
                }
            };
        }
    };
    ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext());

    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Done saving vertices.");
    // YARN: must complete the commit the "task" output, Hadoop isn't there.
    if (getConfiguration().isPureYarnJob() && getConfiguration().getVertexOutputFormatClass() != null) {
        try {
            OutputCommitter outputCommitter = vertexOutputFormat.getOutputCommitter(getContext());
            if (outputCommitter.needsTaskCommit(getContext())) {
                LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                        "OutputCommitter: committing task output.");
                // transfer from temp dirs to "task commit" dirs to prep for
                // the master's OutputCommitter#commitJob(context) call to finish.
                outputCommitter.commitTask(getContext());
            }
        } catch (InterruptedException ie) {
            LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie);
        } catch (IOException ioe) {
            LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe);
        }
    }
}

From source file:org.apache.giraph.worker.BspServiceSource.java

License:Apache License

/**
 * Save the edges using the user-defined EdgeOutputFormat from our
 * vertexArray based on the split./* w w  w. j a  v a 2 s  . c o m*/
 *
 * @throws InterruptedException
 */
private void saveEdges() throws IOException, InterruptedException {
    final ImmutableClassesGiraphConfiguration<I, V, E> conf = getConfiguration();

    if (conf.getEdgeOutputFormatClass() == null) {
        LOG.warn("saveEdges: " + GiraphConstants.EDGE_OUTPUT_FORMAT_CLASS
                + "Make sure that the EdgeOutputFormat is not required.");
        return;
    }

    final int numPartitions = getPartitionStore().getNumPartitions();
    int numThreads = Math.min(conf.getNumOutputThreads(), numPartitions);
    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
            "saveEdges: Starting to save the edges using " + numThreads + " threads");
    final EdgeOutputFormat<I, V, E> edgeOutputFormat = conf.createWrappedEdgeOutputFormat();

    final Queue<Integer> partitionIdQueue = (numPartitions == 0) ? new LinkedList<Integer>()
            : new ArrayBlockingQueue<Integer>(numPartitions);
    Iterables.addAll(partitionIdQueue, getPartitionStore().getPartitionIds());

    CallableFactory<Void> callableFactory = new CallableFactory<Void>() {
        @Override
        public Callable<Void> newCallable(int callableId) {
            return new Callable<Void>() {
                @Override
                public Void call() throws Exception {
                    EdgeWriter<I, V, E> edgeWriter = edgeOutputFormat.createEdgeWriter(getContext());
                    edgeWriter.setConf(conf);
                    edgeWriter.initialize(getContext());

                    long nextPrintVertices = 0;
                    long nextPrintMsecs = System.currentTimeMillis() + 15000;
                    int partitionIndex = 0;
                    int numPartitions = getPartitionStore().getNumPartitions();
                    while (!partitionIdQueue.isEmpty()) {
                        Integer partitionId = partitionIdQueue.poll();
                        if (partitionId == null) {
                            break;
                        }

                        Partition<I, V, E> partition = getPartitionStore().getOrCreatePartition(partitionId);
                        long vertices = 0;
                        long edges = 0;
                        long partitionEdgeCount = partition.getEdgeCount();
                        for (Vertex<I, V, E> vertex : partition) {
                            for (Edge<I, E> edge : vertex.getEdges()) {
                                edgeWriter.writeEdge(vertex.getId(), vertex.getValue(), edge);
                                ++edges;
                            }
                            ++vertices;

                            // Update status at most every 250k vertices or 15 seconds
                            if (vertices > nextPrintVertices && System.currentTimeMillis() > nextPrintMsecs) {
                                LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                                        "saveEdges: Saved " + edges + " edges out of " + partitionEdgeCount
                                                + " partition edges, on partition " + partitionIndex
                                                + " out of " + numPartitions);
                                nextPrintMsecs = System.currentTimeMillis() + 15000;
                                nextPrintVertices = vertices + 250000;
                            }
                        }
                        getPartitionStore().putPartition(partition);
                        ++partitionIndex;
                    }
                    edgeWriter.close(getContext()); // the temp results are saved now
                    return null;
                }
            };
        }
    };
    ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext());

    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveEdges: Done saving edges.");
    // YARN: must complete the commit the "task" output, Hadoop isn't there.
    if (conf.isPureYarnJob() && conf.getVertexOutputFormatClass() != null) {
        try {
            OutputCommitter outputCommitter = edgeOutputFormat.getOutputCommitter(getContext());
            if (outputCommitter.needsTaskCommit(getContext())) {
                LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                        "OutputCommitter: committing task output.");
                // transfer from temp dirs to "task commit" dirs to prep for
                // the master's OutputCommitter#commitJob(context) call to finish.
                outputCommitter.commitTask(getContext());
            }
        } catch (InterruptedException ie) {
            LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie);
        } catch (IOException ioe) {
            LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe);
        }
    }
}