org.apache.giraph.worker.BspServiceSource.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.giraph.worker.BspServiceSource.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.giraph.worker;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.TimeUnit;

import net.iharder.Base64;

import org.apache.commons.lang.SystemUtils;
import org.apache.giraph.bsp.ApplicationState;
import org.apache.giraph.bsp.BspService;
import org.apache.giraph.bsp.CentralizedServiceWorker;
import org.apache.giraph.bsp.CheckpointStatus;
import org.apache.giraph.comm.ServerData;
import org.apache.giraph.comm.WorkerClient;
import org.apache.giraph.comm.WorkerClientRequestProcessor;
import org.apache.giraph.comm.WorkerServer;
import org.apache.giraph.comm.aggregators.WorkerAggregatorRequestProcessor;
import org.apache.giraph.comm.messages.MessageStore;
import org.apache.giraph.comm.messages.queue.AsyncMessageStoreWrapper;
import org.apache.giraph.comm.netty.NettyWorkerAggregatorRequestProcessor;
import org.apache.giraph.comm.netty.NettyWorkerClient;
import org.apache.giraph.comm.netty.NettyWorkerClientRequestProcessor;
import org.apache.giraph.comm.netty.NettyWorkerServer;
import org.apache.giraph.conf.GiraphConstants;
import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.graph.AddressesAndPartitionsWritable;
import org.apache.giraph.graph.FinishedSuperstepStats;
import org.apache.giraph.graph.GlobalStats;
import org.apache.giraph.graph.GraphTaskManager;
import org.apache.giraph.graph.InputSplitEvents;
import org.apache.giraph.graph.InputSplitPaths;
import org.apache.giraph.graph.Vertex;
import org.apache.giraph.graph.VertexEdgeCount;
import org.apache.giraph.io.EdgeOutputFormat;
import org.apache.giraph.io.EdgeWriter;
import org.apache.giraph.io.VertexOutputFormat;
import org.apache.giraph.io.VertexWriter;
import org.apache.giraph.io.superstep_output.SuperstepOutput;
import org.apache.giraph.mapping.translate.TranslateEdge;
import org.apache.giraph.master.MasterInfo;
import org.apache.giraph.master.SuperstepClasses;
import org.apache.giraph.metrics.GiraphMetrics;
import org.apache.giraph.metrics.GiraphTimer;
import org.apache.giraph.metrics.GiraphTimerContext;
import org.apache.giraph.metrics.ResetSuperstepMetricsObserver;
import org.apache.giraph.metrics.SuperstepMetricsRegistry;
import org.apache.giraph.metrics.WorkerSuperstepMetrics;
import org.apache.giraph.partition.Partition;
import org.apache.giraph.partition.PartitionExchange;
import org.apache.giraph.partition.PartitionOwner;
import org.apache.giraph.partition.PartitionStats;
import org.apache.giraph.partition.PartitionStore;
import org.apache.giraph.partition.WorkerGraphPartitioner;
import org.apache.giraph.utils.*;
import org.apache.giraph.zk.BspEvent;
import org.apache.giraph.zk.PredicateLock;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.zookeeper.data.Stat;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;

/**
 * ZooKeeper-based implementation of {@link CentralizedServiceWorker}.
 *
 * @param <I> Vertex id
 * @param <V> Vertex data
 * @param <E> Edge data
 */
@SuppressWarnings("rawtypes")
public class BspServiceSource<I extends WritableComparable, V extends Writable, E extends Writable> extends
        BspServiceWorker<I, V, E> implements CentralizedServiceWorker<I, V, E>, ResetSuperstepMetricsObserver {
    /** Name of gauge for time spent waiting on other workers */
    public static final String TIMER_WAIT_REQUESTS = "wait-requests-us";
    /** Class logger */
    private static final Logger LOG = Logger.getLogger(BspServiceSource.class);
    /** My process health znode */
    private String myHealthZnode;
    /** Worker info */
    private final WorkerInfo workerInfo;
    /** Worker graph partitioner */
    private final WorkerGraphPartitioner<I, V, E> workerGraphPartitioner;
    /** Local Data for each worker */
    private final LocalData<I, V, E, ? extends Writable> localData;
    /** Used to translate Edges during vertex input phase based on localData */
    private final TranslateEdge<I, E> translateEdge;
    /** IPC Client */
    private final WorkerClient<I, V, E> workerClient;
    /** IPC Server */
    private final WorkerServer<I, V, E> workerServer;
    /** Request processor for aggregator requests */
    private final WorkerAggregatorRequestProcessor workerAggregatorRequestProcessor;
    /** Master info */
    private MasterInfo masterInfo = new MasterInfo();
    /** List of workers */
    private List<WorkerInfo> workerInfoList = Lists.newArrayList();
    /** Have the partition exchange children (workers) changed? */
    private final BspEvent partitionExchangeChildrenChanged;

    /** Worker Context */
    private final WorkerContext workerContext;

    /** Handler for aggregators */
    private final WorkerAggregatorHandler globalCommHandler;

    /** Superstep output */
    private final SuperstepOutput<I, V, E> superstepOutput;

    /** array of observers to call back to */
    private final WorkerObserver[] observers;
    /** Writer for worker progress */
    private final WorkerProgressWriter workerProgressWriter;

    // Per-Superstep Metrics
    /** Timer for WorkerContext#postSuperstep */
    private GiraphTimer wcPostSuperstepTimer;
    /** Time spent waiting on requests to finish */
    private GiraphTimer waitRequestsTimer;

    /**
     * Constructor for setting up the worker.
     *
     * @param context Mapper context
     * @param graphTaskManager GraphTaskManager for this compute node
     * @throws IOException
     * @throws InterruptedException
     */
    public BspServiceSource(Mapper<?, ?, ?, ?>.Context context, GraphTaskManager<I, V, E> graphTaskManager)
            throws IOException, InterruptedException {
        super(context, graphTaskManager);
        ImmutableClassesGiraphConfiguration<I, V, E> conf = getConfiguration();
        localData = new LocalData<>(conf);
        translateEdge = getConfiguration().edgeTranslationInstance();
        if (translateEdge != null) {
            translateEdge.initialize(this);
        }
        partitionExchangeChildrenChanged = new PredicateLock(context);
        registerBspEvent(partitionExchangeChildrenChanged);
        workerGraphPartitioner = getGraphPartitionerFactory().createWorkerGraphPartitioner();
        workerInfo = new WorkerInfo();
        workerServer = new NettyWorkerServer<I, V, E>(conf, this, context,
                graphTaskManager.createUncaughtExceptionHandler());
        workerInfo.setInetSocketAddress(workerServer.getMyAddress());
        workerInfo.setTaskId(getTaskPartition());
        workerClient = new NettyWorkerClient<I, V, E>(context, conf, this,
                graphTaskManager.createUncaughtExceptionHandler());

        workerAggregatorRequestProcessor = new NettyWorkerAggregatorRequestProcessor(getContext(), conf, this);

        globalCommHandler = new WorkerAggregatorHandler(this, conf, context);

        workerContext = conf.createWorkerContext();
        workerContext.setWorkerGlobalCommUsage(globalCommHandler);

        superstepOutput = conf.createSuperstepOutput(context);

        if (conf.isJMapHistogramDumpEnabled()) {
            conf.addWorkerObserverClass(JMapHistoDumper.class);
        }
        if (conf.isReactiveJmapHistogramDumpEnabled()) {
            conf.addWorkerObserverClass(ReactiveJMapHistoDumper.class);
        }
        observers = conf.createWorkerObservers();

        WorkerProgress.get().setTaskId(getTaskPartition());
        workerProgressWriter = conf.trackJobProgressOnClient()
                ? new WorkerProgressWriter(graphTaskManager.getJobProgressTracker())
                : null;

        GiraphMetrics.get().addSuperstepResetObserver(this);
    }

    @Override
    public void newSuperstep(SuperstepMetricsRegistry superstepMetrics) {
        waitRequestsTimer = new GiraphTimer(superstepMetrics, TIMER_WAIT_REQUESTS, TimeUnit.MICROSECONDS);
        wcPostSuperstepTimer = new GiraphTimer(superstepMetrics, "worker-context-post-superstep",
                TimeUnit.MICROSECONDS);
    }

    @Override
    public WorkerContext getWorkerContext() {
        return workerContext;
    }

    @Override
    public WorkerObserver[] getWorkerObservers() {
        return observers;
    }

    @Override
    public WorkerClient<I, V, E> getWorkerClient() {
        return workerClient;
    }

    public LocalData<I, V, E, ? extends Writable> getLocalData() {
        return localData;
    }

    public TranslateEdge<I, E> getTranslateEdge() {
        return translateEdge;
    }

    /**
     * Intended to check the health of the node.  For instance, can it ssh,
     * dmesg, etc. For now, does nothing.
     * TODO: Make this check configurable by the user (i.e. search dmesg for
     * problems).
     *
     * @return True if healthy (always in this case).
     */
    public boolean isHealthy() {
        return true;
    }

    /**
     * Load the vertices/edges from input slits. Do this until all the
     * InputSplits have been processed.
     * All workers will try to do as many InputSplits as they can.  The master
     * will monitor progress and stop this once all the InputSplits have been
     * loaded and check-pointed.  Keep track of the last input split path to
     * ensure the input split cache is flushed prior to marking the last input
     * split complete.
     *
     * Use one or more threads to do the loading.
     *
     * @param inputSplitPathList List of input split paths
     * @param inputSplitsCallableFactory Factory for {@link InputSplitsCallable}s
     * @return Statistics of the vertices and edges loaded
     * @throws InterruptedException
     * @throws KeeperException
     */
    private VertexEdgeCount loadInputSplits(List<String> inputSplitPathList,
            CallableFactory<VertexEdgeCount> inputSplitsCallableFactory)
            throws KeeperException, InterruptedException {
        VertexEdgeCount vertexEdgeCount = new VertexEdgeCount();
        // Determine how many threads to use based on the number of input splits
        int maxInputSplitThreads = (inputSplitPathList.size() - 1) / getConfiguration().getMaxWorkers() + 1;
        int numThreads = Math.min(getConfiguration().getNumInputSplitsThreads(), maxInputSplitThreads);
        if (LOG.isInfoEnabled()) {
            LOG.info("loadInputSplits: Using " + numThreads + " thread(s), " + "originally "
                    + getConfiguration().getNumInputSplitsThreads() + " threads(s) for " + inputSplitPathList.size()
                    + " total splits.");
        }

        List<VertexEdgeCount> results = ProgressableUtils.getResultsWithNCallables(inputSplitsCallableFactory,
                numThreads, "load-%d", getContext());
        for (VertexEdgeCount result : results) {
            vertexEdgeCount = vertexEdgeCount.incrVertexEdgeCount(result);
        }

        workerClient.waitAllRequests();
        return vertexEdgeCount;
    }

    /**
     * Load the mapping entries from the user-defined
     * {@link org.apache.giraph.io.MappingReader}
     *
     * @return Count of mapping entries loaded
     */
    private long loadMapping() throws KeeperException, InterruptedException {
        List<String> inputSplitPathList = getZkExt().getChildrenExt(mappingInputSplitsPaths.getPath(), false, false,
                true);

        InputSplitPathOrganizer splitOrganizer = new InputSplitPathOrganizer(getZkExt(), inputSplitPathList,
                getWorkerInfo().getHostname(), getConfiguration().useInputSplitLocality());

        MappingInputSplitsCallableFactory<I, V, E, ? extends Writable> mappingInputSplitsCallableFactory = new MappingInputSplitsCallableFactory<>(
                getConfiguration().createWrappedMappingInputFormat(), splitOrganizer, getContext(),
                getConfiguration(), this, getZkExt());

        long entriesLoaded = 0;
        // Determine how many threads to use based on the number of input splits
        int maxInputSplitThreads = inputSplitPathList.size();
        int numThreads = Math.min(getConfiguration().getNumInputSplitsThreads(), maxInputSplitThreads);
        if (LOG.isInfoEnabled()) {
            LOG.info("loadInputSplits: Using " + numThreads + " thread(s), " + "originally "
                    + getConfiguration().getNumInputSplitsThreads() + " threads(s) for " + inputSplitPathList.size()
                    + " total splits.");
        }

        List<Integer> results = ProgressableUtils.getResultsWithNCallables(mappingInputSplitsCallableFactory,
                numThreads, "load-mapping-%d", getContext());
        for (Integer result : results) {
            entriesLoaded += result;
        }
        // after all threads finish loading - call postFilling
        localData.getMappingStore().postFilling();
        return entriesLoaded;
    }

    /**
     * Load the vertices from the user-defined
     * {@link org.apache.giraph.io.VertexReader}
     *
     * @return Count of vertices and edges loaded
     */
    private VertexEdgeCount loadVertices() throws KeeperException, InterruptedException {
        List<String> inputSplitPathList = getZkExt().getChildrenExt(vertexInputSplitsPaths.getPath(), false, false,
                true);

        InputSplitPathOrganizer splitOrganizer = new InputSplitPathOrganizer(getZkExt(), inputSplitPathList,
                getWorkerInfo().getHostname(), getConfiguration().useInputSplitLocality());
        InputSplitsHandler splitsHandler = new InputSplitsHandler(splitOrganizer, getZkExt(), getContext(),
                BspService.VERTEX_INPUT_SPLIT_RESERVED_NODE, BspService.VERTEX_INPUT_SPLIT_FINISHED_NODE);

        VertexInputSplitsCallableFactory<I, V, E> inputSplitsCallableFactory = new VertexInputSplitsCallableFactory<I, V, E>(
                getConfiguration().createWrappedVertexInputFormat(), getContext(), getConfiguration(), this,
                splitsHandler, getZkExt());

        return loadInputSplits(inputSplitPathList, inputSplitsCallableFactory);
    }

    /**
     * Load the edges from the user-defined
     * {@link org.apache.giraph.io.EdgeReader}.
     *
     * @return Number of edges loaded
     */
    private long loadEdges() throws KeeperException, InterruptedException {
        List<String> inputSplitPathList = getZkExt().getChildrenExt(edgeInputSplitsPaths.getPath(), false, false,
                true);

        InputSplitPathOrganizer splitOrganizer = new InputSplitPathOrganizer(getZkExt(), inputSplitPathList,
                getWorkerInfo().getHostname(), getConfiguration().useInputSplitLocality());
        InputSplitsHandler splitsHandler = new InputSplitsHandler(splitOrganizer, getZkExt(), getContext(),
                BspService.EDGE_INPUT_SPLIT_RESERVED_NODE, BspService.EDGE_INPUT_SPLIT_FINISHED_NODE);

        EdgeInputSplitsCallableFactory<I, V, E> inputSplitsCallableFactory = new EdgeInputSplitsCallableFactory<I, V, E>(
                getConfiguration().createWrappedEdgeInputFormat(), getContext(), getConfiguration(), this,
                splitsHandler, getZkExt());

        return loadInputSplits(inputSplitPathList, inputSplitsCallableFactory).getEdgeCount();
    }

    @Override
    public MasterInfo getMasterInfo() {
        return masterInfo;
    }

    @Override
    public List<WorkerInfo> getWorkerInfoList() {
        return workerInfoList;
    }

    /**
     * Ensure the input splits are ready for processing
     *
     * @param inputSplitPaths Input split paths
     * @param inputSplitEvents Input split events
     */
    private void ensureInputSplitsReady(InputSplitPaths inputSplitPaths, InputSplitEvents inputSplitEvents) {
        while (true) {
            Stat inputSplitsReadyStat;
            try {
                inputSplitsReadyStat = getZkExt().exists(inputSplitPaths.getAllReadyPath(), true);
            } catch (KeeperException e) {
                throw new IllegalStateException(
                        "ensureInputSplitsReady: " + "KeeperException waiting on input splits", e);
            } catch (InterruptedException e) {
                throw new IllegalStateException(
                        "ensureInputSplitsReady: " + "InterruptedException waiting on input splits", e);
            }
            if (inputSplitsReadyStat != null) {
                break;
            }
            inputSplitEvents.getAllReadyChanged().waitForever();
            inputSplitEvents.getAllReadyChanged().reset();
        }
    }

    private void markCurrentSourceDoneThenWaitForOthers() {
        String finishedSourcePath = mutationSplitPaths.getDonePath(getApplicationAttempt(), getSuperstep()) + "/"
                + getHostnamePartitionId();
        try {
            getZkExt().createExt(finishedSourcePath, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, true);
        } catch (KeeperException.NodeExistsException e) {
            LOG.warn("finishSuperstep: finished source path " + finishedSourcePath + " already exists!");
        } catch (KeeperException e) {
            throw new IllegalStateException("Creating " + finishedSourcePath + " failed with KeeperException", e);
        } catch (InterruptedException e) {
            throw new IllegalStateException("Creating " + finishedSourcePath + " failed with InterruptedException",
                    e);
        }

        while (true) {
            Stat inputSplitsDoneStat;
            try {
                inputSplitsDoneStat = getZkExt()
                        .exists(mutationSplitPaths.getAllDonePath(getApplicationAttempt(), getSuperstep()), true);
            } catch (KeeperException e) {
                throw new IllegalStateException("markCurrentWorkerDoneThenWaitForOthers: "
                        + "KeeperException waiting on worker done splits", e);
            } catch (InterruptedException e) {
                throw new IllegalStateException("markCurrentWorkerDoneThenWaitForOthers: "
                        + "InterruptedException waiting on worker done splits", e);
            }
            if (inputSplitsDoneStat != null) {
                break;
            }
            getMutationInputSplitsEvents().getAllDoneChanged().waitForever();
            getMutationInputSplitsEvents().getAllDoneChanged().reset();
        }
    }

    /**
     * Mark current worker as done and then wait for all workers
     * to finish processing input splits.
     *
     * @param inputSplitPaths Input split paths
     * @param inputSplitEvents Input split events
     */
    private void markCurrentWorkerDoneThenWaitForOthers(InputSplitPaths inputSplitPaths,
            InputSplitEvents inputSplitEvents) {
        String workerInputSplitsDonePath = inputSplitPaths.getDonePath() + "/" + getWorkerInfo().getHostnameId();
        try {
            getZkExt().createExt(workerInputSplitsDonePath, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, true);
        } catch (KeeperException e) {
            throw new IllegalStateException(
                    "markCurrentWorkerDoneThenWaitForOthers: " + "KeeperException creating worker done splits", e);
        } catch (InterruptedException e) {
            throw new IllegalStateException(
                    "markCurrentWorkerDoneThenWaitForOthers: " + "InterruptedException creating worker done splits",
                    e);
        }
        while (true) {
            Stat inputSplitsDoneStat;
            try {
                inputSplitsDoneStat = getZkExt().exists(inputSplitPaths.getAllDonePath(), true);
            } catch (KeeperException e) {
                throw new IllegalStateException("markCurrentWorkerDoneThenWaitForOthers: "
                        + "KeeperException waiting on worker done splits", e);
            } catch (InterruptedException e) {
                throw new IllegalStateException("markCurrentWorkerDoneThenWaitForOthers: "
                        + "InterruptedException waiting on worker done splits", e);
            }
            if (inputSplitsDoneStat != null) {
                break;
            }
            inputSplitEvents.getAllDoneChanged().waitForever();
            inputSplitEvents.getAllDoneChanged().reset();
        }
    }

    @Override
    public FinishedSuperstepStats setup() {
        // Unless doing a restart, prepare for computation:
        // 1. Start superstep INPUT_SUPERSTEP (no computation)
        // 2. Wait until the INPUT_SPLIT_ALL_READY_PATH node has been created
        // 3. Process input splits until there are no more.
        // 4. Wait until the INPUT_SPLIT_ALL_DONE_PATH node has been created
        // 5. Process any mutations deriving from add edge requests
        // 6. Wait for superstep INPUT_SUPERSTEP to complete.
        if (getRestartedSuperstep() != UNSET_SUPERSTEP) {
            setCachedSuperstep(getRestartedSuperstep());
            return new FinishedSuperstepStats(0, false, 0, 0, true, CheckpointStatus.NONE);
        }

        JSONObject jobState = getJobState();
        if (jobState != null) {
            try {
                if ((ApplicationState
                        .valueOf(jobState.getString(JSONOBJ_STATE_KEY)) == ApplicationState.START_SUPERSTEP)
                        && jobState.getLong(JSONOBJ_SUPERSTEP_KEY) == getSuperstep()) {
                    if (LOG.isInfoEnabled()) {
                        LOG.info("setup: Restarting from an automated " + "checkpointed superstep " + getSuperstep()
                                + ", attempt " + getApplicationAttempt());
                    }
                    setRestartedSuperstep(getSuperstep());
                    return new FinishedSuperstepStats(0, false, 0, 0, true, CheckpointStatus.NONE);
                }
            } catch (JSONException e) {
                throw new RuntimeException("setup: Failed to get key-values from " + jobState.toString(), e);
            }
        }
        //        registerHealth(getSuperstep());
        //        Collection<? extends PartitionOwner> masterSetPartitionOwners = null;

        if (true == false) {
            // Add the partitions that this worker owns
            Collection<? extends PartitionOwner> masterSetPartitionOwners = startSuperstep();
            workerGraphPartitioner.updatePartitionOwners(getWorkerInfo(), masterSetPartitionOwners);

            workerClient.setup(getConfiguration().authenticate());

            // Initialize aggregator at worker side during setup.
            // Do this just before vertex and edge loading.
            globalCommHandler.prepareSuperstep(workerAggregatorRequestProcessor);

            VertexEdgeCount vertexEdgeCount;
            long entriesLoaded;

            if (getConfiguration().hasMappingInputFormat()) {
                // Ensure the mapping InputSplits are ready for processing
                ensureInputSplitsReady(mappingInputSplitsPaths, mappingInputSplitsEvents);
                getContext().progress();
                try {
                    entriesLoaded = loadMapping();
                    // successfully loaded mapping
                    // now initialize graphPartitionerFactory with this data
                    getGraphPartitionerFactory().initialize(localData);
                } catch (InterruptedException e) {
                    throw new IllegalStateException("setup: loadMapping failed with InterruptedException", e);
                } catch (KeeperException e) {
                    throw new IllegalStateException("setup: loadMapping failed with KeeperException", e);
                }
                getContext().progress();
                if (LOG.isInfoEnabled()) {
                    LOG.info("setup: Finally loaded a total of " + entriesLoaded + " entries from inputSplits");
                }

                // Workers wait for each other to finish, coordinated by master
                markCurrentWorkerDoneThenWaitForOthers(mappingInputSplitsPaths, mappingInputSplitsEvents);
                // Print stats for data stored in localData once mapping is fully
                // loaded on all the workers
                localData.printStats();
            }

            if (getConfiguration().hasVertexInputFormat()) {
                // Ensure the vertex InputSplits are ready for processing
                ensureInputSplitsReady(vertexInputSplitsPaths, vertexInputSplitsEvents);
                getContext().progress();
                try {
                    vertexEdgeCount = loadVertices();
                } catch (InterruptedException e) {
                    throw new IllegalStateException("setup: loadVertices failed with InterruptedException", e);
                } catch (KeeperException e) {
                    throw new IllegalStateException("setup: loadVertices failed with KeeperException", e);
                }
                getContext().progress();
            } else {
                vertexEdgeCount = new VertexEdgeCount();
            }
            WorkerProgress.get().finishLoadingVertices();

            if (getConfiguration().hasEdgeInputFormat()) {
                // Ensure the edge InputSplits are ready for processing
                ensureInputSplitsReady(edgeInputSplitsPaths, edgeInputSplitsEvents);
                getContext().progress();
                try {
                    vertexEdgeCount = vertexEdgeCount.incrVertexEdgeCount(0, loadEdges());
                } catch (InterruptedException e) {
                    throw new IllegalStateException("setup: loadEdges failed with InterruptedException", e);
                } catch (KeeperException e) {
                    throw new IllegalStateException("setup: loadEdges failed with KeeperException", e);
                }
                getContext().progress();
            }
            WorkerProgress.get().finishLoadingEdges();

            if (LOG.isInfoEnabled()) {
                LOG.info("setup: Finally loaded a total of " + vertexEdgeCount);
            }

            if (getConfiguration().hasVertexInputFormat()) {
                // Workers wait for each other to finish, coordinated by master
                markCurrentWorkerDoneThenWaitForOthers(vertexInputSplitsPaths, vertexInputSplitsEvents);
            }

            if (getConfiguration().hasEdgeInputFormat()) {
                // Workers wait for each other to finish, coordinated by master
                markCurrentWorkerDoneThenWaitForOthers(edgeInputSplitsPaths, edgeInputSplitsEvents);
            }

            // Create remaining partitions owned by this worker.
            for (PartitionOwner partitionOwner : masterSetPartitionOwners) {
                if (partitionOwner.getWorkerInfo().equals(getWorkerInfo())
                        && !getPartitionStore().hasPartition(partitionOwner.getPartitionId())) {
                    Partition<I, V, E> partition = getConfiguration()
                            .createPartition(partitionOwner.getPartitionId(), getContext());
                    getPartitionStore().addPartition(partition);
                }
            }

            // remove mapping store if possible
            localData.removeMappingStoreIfPossible();

            if (getConfiguration().hasEdgeInputFormat()) {
                // Move edges from temporary storage to their source vertices.
                getServerData().getEdgeStore().moveEdgesToVertices();
            }

            // Generate the partition stats for the input superstep and process
            // if necessary
            List<PartitionStats> partitionStatsList = new ArrayList<PartitionStats>();
            for (Integer partitionId : getPartitionStore().getPartitionIds()) {
                Partition<I, V, E> partition = getPartitionStore().getOrCreatePartition(partitionId);
                PartitionStats partitionStats = new PartitionStats(partition.getId(), partition.getVertexCount(), 0,
                        partition.getEdgeCount(), 0, 0);
                partitionStatsList.add(partitionStats);
                getPartitionStore().putPartition(partition);
            }
            workerGraphPartitioner.finalizePartitionStats(partitionStatsList, getPartitionStore());

            return finishSuperstep(partitionStatsList, null);
        }
        return null;
    }

    /**
     * Register the health of this worker for a given superstep
     *
     * @param superstep Superstep to register health on
     */
    private void registerHealth(long superstep) {
        JSONArray hostnamePort = new JSONArray();
        hostnamePort.put(getHostname());

        hostnamePort.put(workerInfo.getPort());

        String myHealthPath = null;
        if (isHealthy()) {
            myHealthPath = getSourceInfoHealthyPath(getApplicationAttempt(), getSuperstep());
        } else {
            myHealthPath = getSourceInfoUnhealthyPath(getApplicationAttempt(), getSuperstep());
        }
        myHealthPath = myHealthPath + "/" + workerInfo.getHostnameId();
        try {
            myHealthZnode = getZkExt().createExt(myHealthPath, WritableUtils.writeToByteArray(workerInfo),
                    Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, true);
        } catch (KeeperException.NodeExistsException e) {
            LOG.warn("registerHealth-" + this.getGraphTaskManager().getPartitionID()
                    + ": myHealthPath already exists (likely " + "from previous failure): " + myHealthPath
                    + ".  Waiting for change in attempts " + "to re-join the application");
            getApplicationAttemptChangedEvent().waitForever();
            if (LOG.isInfoEnabled()) {
                LOG.info("registerHealth-" + this.getGraphTaskManager().getPartitionID() + ": Got application "
                        + "attempt changed event, killing self");
            }
            throw new IllegalStateException("registerHealth-" + this.getGraphTaskManager().getPartitionID()
                    + ": Trying " + "to get the new application attempt by killing self", e);
        } catch (KeeperException e) {
            throw new IllegalStateException("Creating " + myHealthPath + " failed with KeeperException", e);
        } catch (InterruptedException e) {
            throw new IllegalStateException("Creating " + myHealthPath + " failed with InterruptedException", e);
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("registerHealth-" + this.getGraphTaskManager().getPartitionID()
                    + ": Created my health node for attempt=" + getApplicationAttempt() + ", superstep="
                    + getSuperstep() + " with " + myHealthZnode + " and workerInfo= " + workerInfo);
        }
    }

    /**
     * Do this to help notify the master quicker that this worker has failed.
     */
    private void unregisterHealth() {
        LOG.error("unregisterHealth-" + this.getGraphTaskManager().getPartitionID()
                + ": Got failure, unregistering health on " + myHealthZnode + " on superstep " + getSuperstep());
        try {
            getZkExt().deleteExt(myHealthZnode, -1, false);
        } catch (InterruptedException e) {
            throw new IllegalStateException("unregisterHealth-" + this.getGraphTaskManager().getPartitionID()
                    + ": InterruptedException - Couldn't delete " + myHealthZnode, e);
        } catch (KeeperException e) {
            throw new IllegalStateException("unregisterHealth-" + this.getGraphTaskManager().getPartitionID()
                    + ": KeeperException - Couldn't delete " + myHealthZnode, e);
        }
    }

    @Override
    public void failureCleanup() {
        unregisterHealth();
    }

    @Override
    public Collection<? extends PartitionOwner> startSuperstep() {
        // Algorithm:
        // 1. Communication service will combine message from previous
        //    superstep
        // 2. Register my health for the next superstep.
        // 3. Wait until the partition assignment is complete and get it
        // 4. Get the aggregator values from the previous superstep
        if (getSuperstep() != INPUT_SUPERSTEP) {
            workerServer.prepareSuperstep();
        }

        //*********************planning Mutation Phase******************************************************************
        registerHealth(getSuperstep());
        waitForMutationStart();
        try {
            Thread.sleep(8000);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        markCurrentSourceDoneThenWaitForOthers();
        //**************************************************************************************************************

        String addressesAndPartitionsPath = getAddressesAndPartitionsPath(getApplicationAttempt(), getSuperstep());
        AddressesAndPartitionsWritable addressesAndPartitions = new AddressesAndPartitionsWritable(
                workerGraphPartitioner.createPartitionOwner().getClass());
        try {
            while (getZkExt().exists(addressesAndPartitionsPath, true) == null) {
                getAddressesAndPartitionsReadyChangedEvent().waitForever();
                getAddressesAndPartitionsReadyChangedEvent().reset();
            }
            WritableUtils.readFieldsFromZnode(getZkExt(), addressesAndPartitionsPath, false, null,
                    addressesAndPartitions);
        } catch (KeeperException e) {
            throw new IllegalStateException("startSuperstep: KeeperException getting assignments", e);
        } catch (InterruptedException e) {
            throw new IllegalStateException("startSuperstep: InterruptedException getting assignments", e);
        }

        workerInfoList.clear();
        workerInfoList = addressesAndPartitions.getWorkerInfos();
        masterInfo = addressesAndPartitions.getMasterInfo();

        if (LOG.isInfoEnabled()) {
            LOG.info("startSuperstep: " + masterInfo);
            LOG.info("startSuperstep: Ready for computation on superstep " + getSuperstep() + " since worker "
                    + "selection and vertex range assignments are done in " + addressesAndPartitionsPath);
        }

        getContext().setStatus("startSuperstep: " + getGraphTaskManager().getGraphFunctions().toString()
                + " - Attempt=" + getApplicationAttempt() + ", Superstep=" + getSuperstep());

        if (LOG.isDebugEnabled()) {
            LOG.debug("startSuperstep: addressesAndPartitions" + addressesAndPartitions.getWorkerInfos());
            for (PartitionOwner partitionOwner : addressesAndPartitions.getPartitionOwners()) {
                LOG.debug(partitionOwner.getPartitionId() + " " + partitionOwner.getWorkerInfo());
            }
        }

        return addressesAndPartitions.getPartitionOwners();
    }

    @Override
    public FinishedSuperstepStats finishSuperstep(List<PartitionStats> partitionStatsList,
            GiraphTimerContext superstepTimerContext) {
        // This barrier blocks until success (or the master signals it to
        // restart).
        //
        // Master will coordinate the barriers and aggregate "doneness" of all
        // the vertices.  Each worker will:
        // 1. Ensure that the requests are complete
        // 2. Execute user postSuperstep() if necessary.
        // 3. Save aggregator values that are in use.
        // 4. Report the statistics (vertices, edges, messages, etc.)
        //    of this worker
        // 5. Let the master know it is finished.
        // 6. Wait for the master's superstep info, and check if done
        waitForRequestsToFinish();

        getGraphTaskManager().notifyFinishedCommunication();

        long workerSentMessages = 0;
        long workerSentMessageBytes = 0;
        long localVertices = 0;
        for (PartitionStats partitionStats : partitionStatsList) {
            workerSentMessages += partitionStats.getMessagesSentCount();
            workerSentMessageBytes += partitionStats.getMessageBytesSentCount();
            localVertices += partitionStats.getVertexCount();
        }

        if (getSuperstep() != INPUT_SUPERSTEP) {
            postSuperstepCallbacks();
        }

        globalCommHandler.finishSuperstep(workerAggregatorRequestProcessor);

        MessageStore<I, Writable> incomingMessageStore = getServerData().getIncomingMessageStore();
        if (incomingMessageStore instanceof AsyncMessageStoreWrapper) {
            ((AsyncMessageStoreWrapper) incomingMessageStore).waitToComplete();
        }

        if (LOG.isInfoEnabled()) {
            LOG.info("finishSuperstep: Superstep " + getSuperstep() + ", messages = " + workerSentMessages + " "
                    + ", message bytes = " + workerSentMessageBytes + " , " + MemoryUtils.getRuntimeMemoryStats());
        }

        if (superstepTimerContext != null) {
            superstepTimerContext.stop();
        }
        writeFinshedSuperstepInfoToZK(partitionStatsList, workerSentMessages, workerSentMessageBytes);

        LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                "finishSuperstep: (waiting for rest " + "of workers) "
                        + getGraphTaskManager().getGraphFunctions().toString() + " - Attempt="
                        + getApplicationAttempt() + ", Superstep=" + getSuperstep());

        String superstepFinishedNode = getSuperstepFinishedPath(getApplicationAttempt(), getSuperstep());

        waitForOtherWorkers(superstepFinishedNode);

        GlobalStats globalStats = new GlobalStats();
        SuperstepClasses superstepClasses = new SuperstepClasses();
        WritableUtils.readFieldsFromZnode(getZkExt(), superstepFinishedNode, false, null, globalStats,
                superstepClasses);
        if (LOG.isInfoEnabled()) {
            LOG.info("finishSuperstep: Completed superstep " + getSuperstep() + " with global stats " + globalStats
                    + " and classes " + superstepClasses);
        }
        getContext().setStatus(
                "finishSuperstep: (all workers done) " + getGraphTaskManager().getGraphFunctions().toString()
                        + " - Attempt=" + getApplicationAttempt() + ", Superstep=" + getSuperstep());
        incrCachedSuperstep();
        getConfiguration().updateSuperstepClasses(superstepClasses);

        return new FinishedSuperstepStats(localVertices, globalStats.getHaltComputation(),
                globalStats.getVertexCount(), globalStats.getEdgeCount(), false, globalStats.getCheckpointStatus());
    }

    /**
     * Handle post-superstep callbacks
     */
    private void postSuperstepCallbacks() {
        GiraphTimerContext timerContext = wcPostSuperstepTimer.time();
        getWorkerContext().postSuperstep();
        timerContext.stop();
        getContext().progress();

        for (WorkerObserver obs : getWorkerObservers()) {
            obs.postSuperstep(getSuperstep());
            getContext().progress();
        }
    }

    /**
     * Wait for all the requests to finish.
     */
    private void waitForRequestsToFinish() {
        if (LOG.isInfoEnabled()) {
            LOG.info("finishSuperstep: Waiting on all requests, superstep " + getSuperstep() + " "
                    + MemoryUtils.getRuntimeMemoryStats());
        }
        GiraphTimerContext timerContext = waitRequestsTimer.time();
        workerClient.waitAllRequests();
        timerContext.stop();
    }

    /**
     * Wait for all the other Workers to finish the superstep.
     *
     * @param superstepFinishedNode ZooKeeper path to wait on.
     */
    private void waitForOtherWorkers(String superstepFinishedNode) {
        try {
            while (getZkExt().exists(superstepFinishedNode, true) == null) {
                getSuperstepFinishedEvent().waitForever();
                getSuperstepFinishedEvent().reset();
            }
        } catch (KeeperException e) {
            throw new IllegalStateException("finishSuperstep: Failed while waiting for master to "
                    + "signal completion of superstep " + getSuperstep(), e);
        } catch (InterruptedException e) {
            throw new IllegalStateException("finishSuperstep: Failed while waiting for master to "
                    + "signal completion of superstep " + getSuperstep(), e);
        }
    }

    /**
     * Wait for all the other Sources to finish the superstep.
     *
     *
     */
    private void waitForMutationStart() {
        String mutationStartPath = getMutationStartPath(getApplicationAttempt(), getSuperstep());
        try {
            while (getZkExt().exists(mutationStartPath, true) == null) {
                getMutationStartCreatedEvent().waitForever();
                getMutationStartCreatedEvent().reset();
            }
        } catch (KeeperException e) {
            throw new IllegalStateException("finishMutationSplits: Failed while waiting for master to "
                    + "signal completion of MutationSplits " + getSuperstep(), e);
        } catch (InterruptedException e) {
            throw new IllegalStateException("finishMutationSplits: Failed while waiting for master to "
                    + "signal completion of mutationSplits " + getSuperstep(), e);
        }
    }

    //    private void endMutationSplits() {
    //        // Let the sources know they can start sending splits
    //        String mutationSplitsDoneDirPath = getMutationSplitsDoneDirPath(getApplicationAttempt(),getSuperstep());
    //        try {
    //            getZkExt().createExt(mutationSplitsDoneDirPath,
    //                    null,
    //                    Ids.OPEN_ACL_UNSAFE,
    //                    CreateMode.PERSISTENT,
    //                    false);
    //        } catch (KeeperException.NodeExistsException e) {
    //            LOG.info("startMutation" + ": Node " +
    //                    mutationStartPath + " already exists.");
    //        } catch (KeeperException e) {
    //            throw new IllegalStateException("startMutation" + ": KeeperException", e);
    //        } catch (InterruptedException e) {
    //            throw new IllegalStateException("startMutation" + ": IllegalStateException", e);
    //        }
    //        return true;
    //    }

    /**
     * Write finished superstep info to ZooKeeper.
     *
     * @param partitionStatsList List of partition stats from superstep.
     * @param workerSentMessages Number of messages sent in superstep.
     * @param workerSentMessageBytes Number of message bytes sent
     *                               in superstep.
     */
    private void writeFinshedSuperstepInfoToZK(List<PartitionStats> partitionStatsList, long workerSentMessages,
            long workerSentMessageBytes) {
        Collection<PartitionStats> finalizedPartitionStats = workerGraphPartitioner
                .finalizePartitionStats(partitionStatsList, getPartitionStore());
        List<PartitionStats> finalizedPartitionStatsList = new ArrayList<PartitionStats>(finalizedPartitionStats);
        byte[] partitionStatsBytes = WritableUtils.writeListToByteArray(finalizedPartitionStatsList);
        WorkerSuperstepMetrics metrics = new WorkerSuperstepMetrics();
        metrics.readFromRegistry();
        byte[] metricsBytes = WritableUtils.writeToByteArray(metrics);

        JSONObject workerFinishedInfoObj = new JSONObject();
        try {
            workerFinishedInfoObj.put(JSONOBJ_PARTITION_STATS_KEY, Base64.encodeBytes(partitionStatsBytes));
            workerFinishedInfoObj.put(JSONOBJ_NUM_MESSAGES_KEY, workerSentMessages);
            workerFinishedInfoObj.put(JSONOBJ_NUM_MESSAGE_BYTES_KEY, workerSentMessageBytes);
            workerFinishedInfoObj.put(JSONOBJ_METRICS_KEY, Base64.encodeBytes(metricsBytes));
        } catch (JSONException e) {
            throw new RuntimeException(e);
        }

        String finishedWorkerPath = getWorkerFinishedPath(getApplicationAttempt(), getSuperstep()) + "/"
                + getHostnamePartitionId();
        try {
            getZkExt().createExt(finishedWorkerPath,
                    workerFinishedInfoObj.toString().getBytes(Charset.defaultCharset()), Ids.OPEN_ACL_UNSAFE,
                    CreateMode.PERSISTENT, true);
        } catch (KeeperException.NodeExistsException e) {
            LOG.warn("finishSuperstep: finished worker path " + finishedWorkerPath + " already exists!");
        } catch (KeeperException e) {
            throw new IllegalStateException("Creating " + finishedWorkerPath + " failed with KeeperException", e);
        } catch (InterruptedException e) {
            throw new IllegalStateException("Creating " + finishedWorkerPath + " failed with InterruptedException",
                    e);
        }
    }

    /**
     * Save the vertices using the user-defined VertexOutputFormat from our
     * vertexArray based on the split.
     *
     * @param numLocalVertices Number of local vertices
     * @throws InterruptedException
     */
    private void saveVertices(long numLocalVertices) throws IOException, InterruptedException {
        ImmutableClassesGiraphConfiguration<I, V, E> conf = getConfiguration();

        if (conf.getVertexOutputFormatClass() == null) {
            LOG.warn("saveVertices: " + GiraphConstants.VERTEX_OUTPUT_FORMAT_CLASS
                    + " not specified -- there will be no saved output");
            return;
        }
        if (conf.doOutputDuringComputation()) {
            if (LOG.isInfoEnabled()) {
                LOG.info("saveVertices: The option for doing output during "
                        + "computation is selected, so there will be no saving of the "
                        + "output in the end of application");
            }
            return;
        }

        final int numPartitions = getPartitionStore().getNumPartitions();
        int numThreads = Math.min(getConfiguration().getNumOutputThreads(), numPartitions);
        LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Starting to save "
                + numLocalVertices + " vertices " + "using " + numThreads + " threads");
        final VertexOutputFormat<I, V, E> vertexOutputFormat = getConfiguration().createWrappedVertexOutputFormat();

        final Queue<Integer> partitionIdQueue = (numPartitions == 0) ? new LinkedList<Integer>()
                : new ArrayBlockingQueue<Integer>(numPartitions);
        Iterables.addAll(partitionIdQueue, getPartitionStore().getPartitionIds());

        long verticesToStore = 0;
        PartitionStore<I, V, E> partitionStore = getPartitionStore();
        for (int partitionId : partitionStore.getPartitionIds()) {
            Partition<I, V, E> partition = partitionStore.getOrCreatePartition(partitionId);
            verticesToStore += partition.getVertexCount();
            partitionStore.putPartition(partition);
        }
        WorkerProgress.get().startStoring(verticesToStore, getPartitionStore().getNumPartitions());

        CallableFactory<Void> callableFactory = new CallableFactory<Void>() {
            @Override
            public Callable<Void> newCallable(int callableId) {
                return new Callable<Void>() {
                    /** How often to update WorkerProgress */
                    private static final long VERTICES_TO_UPDATE_PROGRESS = 100000;

                    @Override
                    public Void call() throws Exception {
                        VertexWriter<I, V, E> vertexWriter = vertexOutputFormat.createVertexWriter(getContext());
                        vertexWriter.setConf(getConfiguration());
                        vertexWriter.initialize(getContext());
                        long nextPrintVertices = 0;
                        long nextUpdateProgressVertices = VERTICES_TO_UPDATE_PROGRESS;
                        long nextPrintMsecs = System.currentTimeMillis() + 15000;
                        int partitionIndex = 0;
                        int numPartitions = getPartitionStore().getNumPartitions();
                        while (!partitionIdQueue.isEmpty()) {
                            Integer partitionId = partitionIdQueue.poll();
                            if (partitionId == null) {
                                break;
                            }

                            Partition<I, V, E> partition = getPartitionStore().getOrCreatePartition(partitionId);
                            long verticesWritten = 0;
                            for (Vertex<I, V, E> vertex : partition) {
                                vertexWriter.writeVertex(vertex);
                                ++verticesWritten;

                                // Update status at most every 250k vertices or 15 seconds
                                if (verticesWritten > nextPrintVertices
                                        && System.currentTimeMillis() > nextPrintMsecs) {
                                    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                                            "saveVertices: Saved " + verticesWritten + " out of "
                                                    + partition.getVertexCount() + " partition vertices, "
                                                    + "on partition " + partitionIndex + " out of "
                                                    + numPartitions);
                                    nextPrintMsecs = System.currentTimeMillis() + 15000;
                                    nextPrintVertices = verticesWritten + 250000;
                                }

                                if (verticesWritten >= nextUpdateProgressVertices) {
                                    WorkerProgress.get().addVerticesStored(VERTICES_TO_UPDATE_PROGRESS);
                                    nextUpdateProgressVertices += VERTICES_TO_UPDATE_PROGRESS;
                                }
                            }
                            getPartitionStore().putPartition(partition);
                            ++partitionIndex;
                            WorkerProgress.get().addVerticesStored(verticesWritten % VERTICES_TO_UPDATE_PROGRESS);
                            WorkerProgress.get().incrementPartitionsStored();
                        }
                        vertexWriter.close(getContext()); // the temp results are saved now
                        return null;
                    }
                };
            }
        };
        ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext());

        LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Done saving vertices.");
        // YARN: must complete the commit the "task" output, Hadoop isn't there.
        if (getConfiguration().isPureYarnJob() && getConfiguration().getVertexOutputFormatClass() != null) {
            try {
                OutputCommitter outputCommitter = vertexOutputFormat.getOutputCommitter(getContext());
                if (outputCommitter.needsTaskCommit(getContext())) {
                    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                            "OutputCommitter: committing task output.");
                    // transfer from temp dirs to "task commit" dirs to prep for
                    // the master's OutputCommitter#commitJob(context) call to finish.
                    outputCommitter.commitTask(getContext());
                }
            } catch (InterruptedException ie) {
                LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie);
            } catch (IOException ioe) {
                LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe);
            }
        }
    }

    /**
     * Save the edges using the user-defined EdgeOutputFormat from our
     * vertexArray based on the split.
     *
     * @throws InterruptedException
     */
    private void saveEdges() throws IOException, InterruptedException {
        final ImmutableClassesGiraphConfiguration<I, V, E> conf = getConfiguration();

        if (conf.getEdgeOutputFormatClass() == null) {
            LOG.warn("saveEdges: " + GiraphConstants.EDGE_OUTPUT_FORMAT_CLASS
                    + "Make sure that the EdgeOutputFormat is not required.");
            return;
        }

        final int numPartitions = getPartitionStore().getNumPartitions();
        int numThreads = Math.min(conf.getNumOutputThreads(), numPartitions);
        LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                "saveEdges: Starting to save the edges using " + numThreads + " threads");
        final EdgeOutputFormat<I, V, E> edgeOutputFormat = conf.createWrappedEdgeOutputFormat();

        final Queue<Integer> partitionIdQueue = (numPartitions == 0) ? new LinkedList<Integer>()
                : new ArrayBlockingQueue<Integer>(numPartitions);
        Iterables.addAll(partitionIdQueue, getPartitionStore().getPartitionIds());

        CallableFactory<Void> callableFactory = new CallableFactory<Void>() {
            @Override
            public Callable<Void> newCallable(int callableId) {
                return new Callable<Void>() {
                    @Override
                    public Void call() throws Exception {
                        EdgeWriter<I, V, E> edgeWriter = edgeOutputFormat.createEdgeWriter(getContext());
                        edgeWriter.setConf(conf);
                        edgeWriter.initialize(getContext());

                        long nextPrintVertices = 0;
                        long nextPrintMsecs = System.currentTimeMillis() + 15000;
                        int partitionIndex = 0;
                        int numPartitions = getPartitionStore().getNumPartitions();
                        while (!partitionIdQueue.isEmpty()) {
                            Integer partitionId = partitionIdQueue.poll();
                            if (partitionId == null) {
                                break;
                            }

                            Partition<I, V, E> partition = getPartitionStore().getOrCreatePartition(partitionId);
                            long vertices = 0;
                            long edges = 0;
                            long partitionEdgeCount = partition.getEdgeCount();
                            for (Vertex<I, V, E> vertex : partition) {
                                for (Edge<I, E> edge : vertex.getEdges()) {
                                    edgeWriter.writeEdge(vertex.getId(), vertex.getValue(), edge);
                                    ++edges;
                                }
                                ++vertices;

                                // Update status at most every 250k vertices or 15 seconds
                                if (vertices > nextPrintVertices && System.currentTimeMillis() > nextPrintMsecs) {
                                    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                                            "saveEdges: Saved " + edges + " edges out of " + partitionEdgeCount
                                                    + " partition edges, on partition " + partitionIndex
                                                    + " out of " + numPartitions);
                                    nextPrintMsecs = System.currentTimeMillis() + 15000;
                                    nextPrintVertices = vertices + 250000;
                                }
                            }
                            getPartitionStore().putPartition(partition);
                            ++partitionIndex;
                        }
                        edgeWriter.close(getContext()); // the temp results are saved now
                        return null;
                    }
                };
            }
        };
        ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext());

        LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveEdges: Done saving edges.");
        // YARN: must complete the commit the "task" output, Hadoop isn't there.
        if (conf.isPureYarnJob() && conf.getVertexOutputFormatClass() != null) {
            try {
                OutputCommitter outputCommitter = edgeOutputFormat.getOutputCommitter(getContext());
                if (outputCommitter.needsTaskCommit(getContext())) {
                    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                            "OutputCommitter: committing task output.");
                    // transfer from temp dirs to "task commit" dirs to prep for
                    // the master's OutputCommitter#commitJob(context) call to finish.
                    outputCommitter.commitTask(getContext());
                }
            } catch (InterruptedException ie) {
                LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie);
            } catch (IOException ioe) {
                LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe);
            }
        }
    }

    @Override
    public void cleanup(FinishedSuperstepStats finishedSuperstepStats) throws IOException, InterruptedException {
        workerClient.closeConnections();
        setCachedSuperstep(getSuperstep() - 1);
        if (finishedSuperstepStats.getCheckpointStatus() != CheckpointStatus.CHECKPOINT_AND_HALT) {
            saveVertices(finishedSuperstepStats.getLocalVertexCount());
            saveEdges();
        }
        WorkerProgress.get().finishStoring();
        if (workerProgressWriter != null) {
            workerProgressWriter.stop();
        }
        getPartitionStore().shutdown();
        // All worker processes should denote they are done by adding special
        // znode.  Once the number of znodes equals the number of partitions
        // for workers and masters, the master will clean up the ZooKeeper
        // znodes associated with this job.
        String workerCleanedUpPath = cleanedUpPath + "/" + getTaskPartition() + WORKER_SUFFIX;
        try {
            String finalFinishedPath = getZkExt().createExt(workerCleanedUpPath, null, Ids.OPEN_ACL_UNSAFE,
                    CreateMode.PERSISTENT, true);
            if (LOG.isInfoEnabled()) {
                LOG.info("cleanup: Notifying master its okay to cleanup with " + finalFinishedPath);
            }
        } catch (KeeperException.NodeExistsException e) {
            if (LOG.isInfoEnabled()) {
                LOG.info("cleanup: Couldn't create finished node '" + workerCleanedUpPath);
            }
        } catch (KeeperException e) {
            // Cleaning up, it's okay to fail after cleanup is successful
            LOG.error("cleanup: Got KeeperException on notification " + "to master about cleanup", e);
        } catch (InterruptedException e) {
            // Cleaning up, it's okay to fail after cleanup is successful
            LOG.error("cleanup: Got InterruptedException on notification " + "to master about cleanup", e);
        }
        try {
            getZkExt().close();
        } catch (InterruptedException e) {
            // cleanup phase -- just log the error
            LOG.error("cleanup: Zookeeper failed to close with " + e);
        }

        if (getConfiguration().metricsEnabled()) {
            GiraphMetrics.get().dumpToStream(System.err);
        }

        // Preferably would shut down the service only after
        // all clients have disconnected (or the exceptions on the
        // client side ignored).
        workerServer.close();
    }

    @Override
    public void storeCheckpoint() throws IOException {
        LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                "storeCheckpoint: Starting checkpoint " + getGraphTaskManager().getGraphFunctions().toString()
                        + " - Attempt=" + getApplicationAttempt() + ", Superstep=" + getSuperstep());

        // Algorithm:
        // For each partition, dump vertices and messages
        Path metadataFilePath = createCheckpointFilePathSafe(CheckpointingUtils.CHECKPOINT_METADATA_POSTFIX);
        Path validFilePath = createCheckpointFilePathSafe(CheckpointingUtils.CHECKPOINT_VALID_POSTFIX);
        Path checkpointFilePath = createCheckpointFilePathSafe(CheckpointingUtils.CHECKPOINT_DATA_POSTFIX);

        // Metadata is buffered and written at the end since it's small and
        // needs to know how many partitions this worker owns
        FSDataOutputStream metadataOutputStream = getFs().create(metadataFilePath);
        metadataOutputStream.writeInt(getPartitionStore().getNumPartitions());

        for (Integer partitionId : getPartitionStore().getPartitionIds()) {
            metadataOutputStream.writeInt(partitionId);
        }
        metadataOutputStream.close();

        storeCheckpointVertices();

        FSDataOutputStream checkpointOutputStream = getFs().create(checkpointFilePath);
        workerContext.write(checkpointOutputStream);
        getContext().progress();

        for (Integer partitionId : getPartitionStore().getPartitionIds()) {
            // write messages
            checkpointOutputStream.writeInt(partitionId);
            getServerData().getCurrentMessageStore().writePartition(checkpointOutputStream, partitionId);
            getContext().progress();

        }

        List<Writable> w2wMessages = getServerData().getCurrentWorkerToWorkerMessages();
        WritableUtils.writeList(w2wMessages, checkpointOutputStream);

        checkpointOutputStream.close();

        getFs().createNewFile(validFilePath);

        // Notify master that checkpoint is stored
        String workerWroteCheckpoint = getWorkerWroteCheckpointPath(getApplicationAttempt(), getSuperstep()) + "/"
                + getHostnamePartitionId();
        try {
            getZkExt().createExt(workerWroteCheckpoint, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT,
                    true);
        } catch (KeeperException.NodeExistsException e) {
            LOG.warn("storeCheckpoint: wrote checkpoint worker path " + workerWroteCheckpoint + " already exists!");
        } catch (KeeperException e) {
            throw new IllegalStateException("Creating " + workerWroteCheckpoint + " failed with KeeperException",
                    e);
        } catch (InterruptedException e) {
            throw new IllegalStateException(
                    "Creating " + workerWroteCheckpoint + " failed with InterruptedException", e);
        }
    }

    /**
     * Create checkpoint file safely. If file already exists remove it first.
     * @param name file extension
     * @return full file path to newly created file
     * @throws IOException
     */
    private Path createCheckpointFilePathSafe(String name) throws IOException {
        Path validFilePath = new Path(getCheckpointBasePath(getSuperstep()) + "." + getTaskPartition() + name);
        // Remove these files if they already exist (shouldn't though, unless
        // of previous failure of this worker)
        if (getFs().delete(validFilePath, false)) {
            LOG.warn("storeCheckpoint: Removed " + name + " file " + validFilePath);
        }
        return validFilePath;
    }

    /**
     * Returns path to saved checkpoint.
     * Doesn't check if file actually exists.
     * @param superstep saved superstep.
     * @param name extension name
     * @return fill file path to checkpoint file
     */
    private Path getSavedCheckpoint(long superstep, String name) {
        return new Path(getSavedCheckpointBasePath(superstep) + "." + getTaskPartition() + name);
    }

    /**
     * Save partitions. To speed up this operation
     * runs in multiple threads.
     */
    private void storeCheckpointVertices() {
        final int numPartitions = getPartitionStore().getNumPartitions();
        int numThreads = Math.min(GiraphConstants.NUM_CHECKPOINT_IO_THREADS.get(getConfiguration()), numPartitions);

        final Queue<Integer> partitionIdQueue = (numPartitions == 0) ? new LinkedList<Integer>()
                : new ArrayBlockingQueue<Integer>(numPartitions);
        Iterables.addAll(partitionIdQueue, getPartitionStore().getPartitionIds());

        final CompressionCodec codec = new CompressionCodecFactory(getConfiguration())
                .getCodec(new Path(GiraphConstants.CHECKPOINT_COMPRESSION_CODEC.get(getConfiguration())));

        long t0 = System.currentTimeMillis();

        CallableFactory<Void> callableFactory = new CallableFactory<Void>() {
            @Override
            public Callable<Void> newCallable(int callableId) {
                return new Callable<Void>() {

                    @Override
                    public Void call() throws Exception {
                        while (!partitionIdQueue.isEmpty()) {
                            Integer partitionId = partitionIdQueue.poll();
                            if (partitionId == null) {
                                break;
                            }
                            Path path = createCheckpointFilePathSafe(
                                    "_" + partitionId + CheckpointingUtils.CHECKPOINT_VERTICES_POSTFIX);

                            FSDataOutputStream uncompressedStream = getFs().create(path);

                            DataOutputStream stream = codec == null ? uncompressedStream
                                    : new DataOutputStream(codec.createOutputStream(uncompressedStream));

                            Partition<I, V, E> partition = getPartitionStore().getOrCreatePartition(partitionId);

                            partition.write(stream);

                            getPartitionStore().putPartition(partition);

                            stream.close();
                            uncompressedStream.close();
                        }
                        return null;
                    }

                };
            }
        };

        ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "checkpoint-vertices-%d",
                getContext());

        LOG.info("Save checkpoint in " + (System.currentTimeMillis() - t0) + " ms, using " + numThreads
                + " threads");
    }

    /**
     * Load saved partitions in multiple threads.
     * @param superstep superstep to load
     * @param partitions list of partitions to load
     */
    private void loadCheckpointVertices(final long superstep, List<Integer> partitions) {
        int numThreads = Math.min(GiraphConstants.NUM_CHECKPOINT_IO_THREADS.get(getConfiguration()),
                partitions.size());

        final Queue<Integer> partitionIdQueue = new ConcurrentLinkedQueue<>(partitions);

        final CompressionCodec codec = new CompressionCodecFactory(getConfiguration())
                .getCodec(new Path(GiraphConstants.CHECKPOINT_COMPRESSION_CODEC.get(getConfiguration())));

        long t0 = System.currentTimeMillis();

        CallableFactory<Void> callableFactory = new CallableFactory<Void>() {
            @Override
            public Callable<Void> newCallable(int callableId) {
                return new Callable<Void>() {

                    @Override
                    public Void call() throws Exception {
                        while (!partitionIdQueue.isEmpty()) {
                            Integer partitionId = partitionIdQueue.poll();
                            if (partitionId == null) {
                                break;
                            }
                            Path path = getSavedCheckpoint(superstep,
                                    "_" + partitionId + CheckpointingUtils.CHECKPOINT_VERTICES_POSTFIX);

                            FSDataInputStream compressedStream = getFs().open(path);

                            DataInputStream stream = codec == null ? compressedStream
                                    : new DataInputStream(codec.createInputStream(compressedStream));

                            Partition<I, V, E> partition = getConfiguration().createPartition(partitionId,
                                    getContext());

                            partition.readFields(stream);

                            getPartitionStore().addPartition(partition);

                            stream.close();
                        }
                        return null;
                    }

                };
            }
        };

        ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "load-vertices-%d", getContext());

        LOG.info("Loaded checkpoint in " + (System.currentTimeMillis() - t0) + " ms, using " + numThreads
                + " threads");
    }

    @Override
    public VertexEdgeCount loadCheckpoint(long superstep) {
        Path metadataFilePath = getSavedCheckpoint(superstep, CheckpointingUtils.CHECKPOINT_METADATA_POSTFIX);

        Path checkpointFilePath = getSavedCheckpoint(superstep, CheckpointingUtils.CHECKPOINT_DATA_POSTFIX);
        // Algorithm:
        // Examine all the partition owners and load the ones
        // that match my hostname and id from the master designated checkpoint
        // prefixes.
        try {
            DataInputStream metadataStream = getFs().open(metadataFilePath);

            int partitions = metadataStream.readInt();
            List<Integer> partitionIds = new ArrayList<>(partitions);
            for (int i = 0; i < partitions; i++) {
                int partitionId = metadataStream.readInt();
                partitionIds.add(partitionId);
            }

            loadCheckpointVertices(superstep, partitionIds);

            getContext().progress();

            metadataStream.close();

            DataInputStream checkpointStream = getFs().open(checkpointFilePath);
            workerContext.readFields(checkpointStream);

            // Load global stats and superstep classes
            GlobalStats globalStats = new GlobalStats();
            SuperstepClasses superstepClasses = new SuperstepClasses();
            String finalizedCheckpointPath = getSavedCheckpointBasePath(superstep)
                    + CheckpointingUtils.CHECKPOINT_FINALIZED_POSTFIX;
            DataInputStream finalizedStream = getFs().open(new Path(finalizedCheckpointPath));
            globalStats.readFields(finalizedStream);
            superstepClasses.readFields(finalizedStream);
            getConfiguration().updateSuperstepClasses(superstepClasses);
            getServerData().resetMessageStores();

            for (int i = 0; i < partitions; i++) {
                int partitionId = checkpointStream.readInt();
                getServerData().getCurrentMessageStore().readFieldsForPartition(checkpointStream, partitionId);
            }

            List<Writable> w2wMessages = (List<Writable>) WritableUtils.readList(checkpointStream);
            getServerData().getCurrentWorkerToWorkerMessages().addAll(w2wMessages);

            checkpointStream.close();

            if (LOG.isInfoEnabled()) {
                LOG.info(
                        "loadCheckpoint: Loaded " + workerGraphPartitioner.getPartitionOwners().size() + " total.");
            }

            // Communication service needs to setup the connections prior to
            // processing vertices

            workerClient.setup(getConfiguration().authenticate());

            return new VertexEdgeCount(globalStats.getVertexCount(), globalStats.getEdgeCount());

        } catch (IOException e) {
            throw new RuntimeException("loadCheckpoint: Failed for superstep=" + superstep, e);
        }
    }

    /**
     * Send the worker partitions to their destination workers
     *
     * @param workerPartitionMap Map of worker info to the partitions stored
     *        on this worker to be sent
     */
    private void sendWorkerPartitions(Map<WorkerInfo, List<Integer>> workerPartitionMap) {
        List<Entry<WorkerInfo, List<Integer>>> randomEntryList = new ArrayList<Entry<WorkerInfo, List<Integer>>>(
                workerPartitionMap.entrySet());
        Collections.shuffle(randomEntryList);
        WorkerClientRequestProcessor<I, V, E> workerClientRequestProcessor = new NettyWorkerClientRequestProcessor<I, V, E>(
                getContext(), getConfiguration(), this);
        for (Entry<WorkerInfo, List<Integer>> workerPartitionList : randomEntryList) {
            for (Integer partitionId : workerPartitionList.getValue()) {
                Partition<I, V, E> partition = getPartitionStore().removePartition(partitionId);
                if (partition == null) {
                    throw new IllegalStateException("sendWorkerPartitions: Couldn't find partition " + partitionId
                            + " to send to " + workerPartitionList.getKey());
                }
                if (LOG.isInfoEnabled()) {
                    LOG.info("sendWorkerPartitions: Sending worker " + workerPartitionList.getKey() + " partition "
                            + partitionId);
                }
                workerClientRequestProcessor.sendPartitionRequest(workerPartitionList.getKey(), partition);
            }
        }

        try {
            workerClientRequestProcessor.flush();
            workerClient.waitAllRequests();
        } catch (IOException e) {
            throw new IllegalStateException("sendWorkerPartitions: Flush failed", e);
        }
        String myPartitionExchangeDonePath = getPartitionExchangeWorkerPath(getApplicationAttempt(), getSuperstep(),
                getWorkerInfo());
        try {
            getZkExt().createExt(myPartitionExchangeDonePath, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT,
                    true);
        } catch (KeeperException e) {
            throw new IllegalStateException(
                    "sendWorkerPartitions: KeeperException to create " + myPartitionExchangeDonePath, e);
        } catch (InterruptedException e) {
            throw new IllegalStateException(
                    "sendWorkerPartitions: InterruptedException to create " + myPartitionExchangeDonePath, e);
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("sendWorkerPartitions: Done sending all my partitions.");
        }
    }

    //    @Override
    //    public final void exchangeVertexPartitions(
    //            Collection<? extends PartitionOwner> masterSetPartitionOwners) {
    //        // 1. Fix the addresses of the partition ids if they have changed.
    //        // 2. Send all the partitions to their destination workers in a random
    //        //    fashion.
    //        // 3. Notify completion with a ZooKeeper stamp
    //        // 4. Wait for all my dependencies to be done (if any)
    //        // 5. Add the partitions to myself.
    //        PartitionExchange partitionExchange =
    //                workerGraphPartitioner.updatePartitionOwners(
    //                        getWorkerInfo(), masterSetPartitionOwners);
    //        workerClient.openConnections();
    //
    //        Map<WorkerInfo, List<Integer>> sendWorkerPartitionMap =
    //                partitionExchange.getSendWorkerPartitionMap();
    //        if (!getPartitionStore().isEmpty()) {
    //            sendWorkerPartitions(sendWorkerPartitionMap);
    //        }
    //
    //        Set<WorkerInfo> myDependencyWorkerSet =
    //                partitionExchange.getMyDependencyWorkerSet();
    //        Set<String> workerIdSet = new HashSet<String>();
    //        for (WorkerInfo tmpWorkerInfo : myDependencyWorkerSet) {
    //            if (!workerIdSet.add(tmpWorkerInfo.getHostnameId())) {
    //                throw new IllegalStateException(
    //                        "exchangeVertexPartitions: Duplicate entry " + tmpWorkerInfo);
    //            }
    //        }
    //        if (myDependencyWorkerSet.isEmpty() && getPartitionStore().isEmpty()) {
    //            if (LOG.isInfoEnabled()) {
    //                LOG.info("exchangeVertexPartitions: Nothing to exchange, " +
    //                        "exiting early");
    //            }
    //            return;
    //        }
    //
    //        String vertexExchangePath =
    //                getPartitionExchangePath(getApplicationAttempt(), getSuperstep());
    //        List<String> workerDoneList;
    //        try {
    //            while (true) {
    //                workerDoneList = getZkExt().getChildrenExt(
    //                        vertexExchangePath, true, false, false);
    //                workerIdSet.removeAll(workerDoneList);
    //                if (workerIdSet.isEmpty()) {
    //                    break;
    //                }
    //                if (LOG.isInfoEnabled()) {
    //                    LOG.info("exchangeVertexPartitions: Waiting for workers " +
    //                            workerIdSet);
    //                }
    //                getPartitionExchangeChildrenChangedEvent().waitForever();
    //                getPartitionExchangeChildrenChangedEvent().reset();
    //            }
    //        } catch (KeeperException | InterruptedException e) {
    //            throw new RuntimeException(
    //                    "exchangeVertexPartitions: Got runtime exception", e);
    //        }
    //
    //        if (LOG.isInfoEnabled()) {
    //            LOG.info("exchangeVertexPartitions: Done with exchange.");
    //        }
    //    }

    /**
     * Get event when the state of a partition exchange has changed.
     *
     * @return Event to check.
     */
    //    public final BspEvent getPartitionExchangeChildrenChangedEvent() {
    //        return partitionExchangeChildrenChanged;
    //    }

    @Override
    protected boolean processEvent(WatchedEvent event) {
        boolean foundEvent = false;
        if (event.getPath().startsWith(masterJobStatePath) && (event.getType() == EventType.NodeChildrenChanged)) {
            if (LOG.isInfoEnabled()) {
                LOG.info("processEvent: Job state changed, checking " + "to see if it needs to restart");
            }
            JSONObject jsonObj = getJobState();
            // in YARN, we have to manually commit our own output in 2 stages that we
            // do not have to do in Hadoop-based Giraph. So jsonObj can be null.
            if (getConfiguration().isPureYarnJob() && null == jsonObj) {
                LOG.error("BspServiceWorker#getJobState() came back NULL.");
                return false; // the event has been processed.
            }
            try {
                if ((ApplicationState
                        .valueOf(jsonObj.getString(JSONOBJ_STATE_KEY)) == ApplicationState.START_SUPERSTEP)
                        && jsonObj.getLong(JSONOBJ_APPLICATION_ATTEMPT_KEY) != getApplicationAttempt()) {
                    LOG.fatal("processEvent: Worker will restart " + "from command - " + jsonObj.toString());
                    System.exit(-1);
                }
            } catch (JSONException e) {
                throw new RuntimeException(
                        "processEvent: Couldn't properly get job state from " + jsonObj.toString());
            }
            foundEvent = true;
        } else if (event.getPath().contains(PARTITION_EXCHANGE_DIR)
                && event.getType() == EventType.NodeChildrenChanged) {
            if (LOG.isInfoEnabled()) {
                LOG.info("processEvent : partitionExchangeChildrenChanged "
                        + "(at least one worker is done sending partitions)");
            }
            partitionExchangeChildrenChanged.signal();
            foundEvent = true;
        }

        return foundEvent;
    }

    @Override
    public WorkerInfo getWorkerInfo() {
        return workerInfo;
    }

    @Override
    public PartitionStore<I, V, E> getPartitionStore() {
        return getServerData().getPartitionStore();
    }

    @Override
    public PartitionOwner getVertexPartitionOwner(I vertexId) {
        return workerGraphPartitioner.getPartitionOwner(vertexId);
    }

    @Override
    public Iterable<? extends PartitionOwner> getPartitionOwners() {
        return workerGraphPartitioner.getPartitionOwners();
    }

    @Override
    public int getPartitionId(I vertexId) {
        PartitionOwner partitionOwner = getVertexPartitionOwner(vertexId);
        return partitionOwner.getPartitionId();
    }

    @Override
    public boolean hasPartition(Integer partitionId) {
        return getPartitionStore().hasPartition(partitionId);
    }

    @Override
    public ServerData<I, V, E> getServerData() {
        return workerServer.getServerData();
    }

    @Override
    public WorkerAggregatorHandler getAggregatorHandler() {
        return globalCommHandler;
    }

    @Override
    public void prepareSuperstep() {
        if (getSuperstep() != INPUT_SUPERSTEP) {
            globalCommHandler.prepareSuperstep(workerAggregatorRequestProcessor);
        }
    }

    @Override
    public SuperstepOutput<I, V, E> getSuperstepOutput() {
        return superstepOutput;
    }

    @Override
    public GlobalStats getGlobalStats() {
        GlobalStats globalStats = new GlobalStats();
        if (getSuperstep() > Math.max(INPUT_SUPERSTEP, getRestartedSuperstep())) {
            String superstepFinishedNode = getSuperstepFinishedPath(getApplicationAttempt(), getSuperstep() - 1);
            WritableUtils.readFieldsFromZnode(getZkExt(), superstepFinishedNode, false, null, globalStats);
        }
        return globalStats;
    }
}