Source code

Java tutorial


Here is the source code for


 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

package org.apache.giraph.partition;

import org.apache.giraph.bsp.CentralizedServiceWorker;
import org.apache.giraph.comm.requests.SendTokenDepRequest;
import org.apache.giraph.conf.GiraphConfiguration;
import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.graph.Vertex;
import org.apache.giraph.worker.WorkerInfo;
import org.apache.giraph.utils.ByteArrayVertexIdNullData;
import org.apache.giraph.utils.VertexIdData;
import org.apache.log4j.Logger;

import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;

import java.util.Set;

 * YH: Stores and tracks vertex types according to vertex id.
 * @param <I> Vertex id
 * @param <V> Vertex value
 * @param <E> Edge value
public class VertexTypeStore<I extends WritableComparable, V extends Writable, E extends Writable> {
     * The possible types of vertices. Used for token serializability.
    public static enum VertexType {
        /** all neighbours are in same partition */
        /** all neighbours are in same worker (but different partitions) */
        /** all neighbours are in remote workers (NONE in same worker) */
        /** neighbours in both remote and local workers */

    /** Class logger */
    private static final Logger LOG = Logger.getLogger(VertexTypeStore.class);

    /** Provided configuration */
    private final ImmutableClassesGiraphConfiguration<I, V, E> conf;
    /** Service worker */
    private final CentralizedServiceWorker<I, V, E> serviceWorker;

    // With hash partitioning, edge cuts are extremely high, meaning
    // 99%+ of vertices are usually local/remote/mixed boundary.
    // However, we can only implicitly track internal vertices
    // (i.e., not being in the sets => vertex is internal), because
    // we choose to send messages only for notifying dependencies
    // and no messages otherwise.
    /** Set of local boundary vertex ids (owned by this worker only) */
    private Set localBoundaryVertices;
    /** Set of remote boundary vertex ids (owned by this worker only) */
    private Set remoteBoundaryVertices;
    /** Set of mixed boundary vertex ids (owned by this worker only) */
    private Set mixedBoundaryVertices;

     * Constructor.
     * @param conf Configuration used.
     * @param serviceWorker Service worker
    public VertexTypeStore(ImmutableClassesGiraphConfiguration<I, V, E> conf,
            CentralizedServiceWorker<I, V, E> serviceWorker) {
        this.conf = conf;
        this.serviceWorker = serviceWorker;

        if (conf.getAsyncConf().tokenSerialized()) {
            Class<I> vertexIdClass = conf.getVertexIdClass();
            if (vertexIdClass.equals(IntWritable.class)) {
                localBoundaryVertices = new IntOpenHashSet();
                remoteBoundaryVertices = new IntOpenHashSet();
                mixedBoundaryVertices = new IntOpenHashSet();
            } else if (vertexIdClass.equals(LongWritable.class)) {
                localBoundaryVertices = new LongOpenHashSet();
                remoteBoundaryVertices = new LongOpenHashSet();
                mixedBoundaryVertices = new LongOpenHashSet();
            } else {
                localBoundaryVertices = new ObjectOpenHashSet<I>();
                remoteBoundaryVertices = new ObjectOpenHashSet<I>();
                mixedBoundaryVertices = new ObjectOpenHashSet<I>();

     * Add/track a vertex and its type.
     * @param vertex Vertex to add
    public void addVertex(Vertex<I, V, E> vertex) {
        boolean isRemoteBoundary = false;
        boolean isLocalBoundary = false;

        int partitionId = serviceWorker.getVertexPartitionOwner(vertex.getId()).getPartitionId();
        WorkerInfo myWorker = serviceWorker.getWorkerInfo();

        // handle out-edge depedencies first
        // in-edge handled later via sendDependencies()
        for (Edge<I, E> e : vertex.getEdges()) {
            PartitionOwner dstOwner = serviceWorker.getVertexPartitionOwner(e.getTargetVertexId());
            int dstPartitionId = dstOwner.getPartitionId();
            WorkerInfo dstWorker = dstOwner.getWorkerInfo();

            // check if neighbour is remote; if not,
            // check if neighbour is in another local partition
            if (!myWorker.equals(dstWorker)) {
                isRemoteBoundary = true;
            } else if (dstPartitionId != partitionId) {
                isLocalBoundary = true;

            // need to check all edges before concluding vertex
            // is ONLY local or remote boundary, but if it's
            // already both/mixed, we can quit early
            if (isRemoteBoundary && isLocalBoundary) {

        if (!isRemoteBoundary && !isLocalBoundary) {
            setVertexType(vertex.getId(), VertexType.INTERNAL);
        } else if (!isRemoteBoundary && isLocalBoundary) {
            setVertexType(vertex.getId(), VertexType.LOCAL_BOUNDARY);
        } else if (isRemoteBoundary && !isLocalBoundary) {
            setVertexType(vertex.getId(), VertexType.REMOTE_BOUNDARY);
        } else {
            setVertexType(vertex.getId(), VertexType.MIXED_BOUNDARY);

     * For every vertex, send message to neighbours that are not
     * on the same partition. Required for directed graphs.
     * This ensures that every vertex is categorized based on
     * both their in-edge and out-edge neighbours.
    public void sendDependencies() {
        // when to flush cache/send message to particular worker
        int maxMessagesSizePerWorker = GiraphConfiguration.MAX_MSG_REQUEST_SIZE.get(conf);

        // cache for messages that will be sent to neighbours
        // (much more efficient than using SendDataCache b/c
        //  we don't need to store/know dst partition ids)
        Int2ObjectOpenHashMap<VertexIdData<I, NullWritable>> msgCache = new Int2ObjectOpenHashMap<VertexIdData<I, NullWritable>>();

        int taskId = serviceWorker.getWorkerInfo().getTaskId();
        PartitionStore<I, V, E> pStore = serviceWorker.getPartitionStore();

        // don't need to synchronize, b/c this is all single threaded
        for (int partitionId : pStore.getPartitionIds()) {
            for (Vertex<I, V, E> vertex : pStore.getOrCreatePartition(partitionId)) {
                for (Edge<I, E> e : vertex.getEdges()) {
                    PartitionOwner dstOwner = serviceWorker.getVertexPartitionOwner(e.getTargetVertexId());
                    int dstPartitionId = dstOwner.getPartitionId();
                    int dstTaskId = dstOwner.getWorkerInfo().getTaskId();

                    if (dstPartitionId == partitionId) {
                        continue; // skip, no dependency
                    } else if (taskId == dstTaskId) {
                        // local dependency
                        receiveDependency(e.getTargetVertexId(), true);
                    } else {
                        // remote dependency
                        // We may send redundant dependency messages, but
                        // it's faster to send it than to prune it.
                        VertexIdData<I, NullWritable> messages = msgCache.get(dstTaskId);
                        if (messages == null) {
                            messages = new ByteArrayVertexIdNullData<I>();
                            msgCache.put(dstTaskId, messages);

                        // no data---messages are vertex id only
                        messages.add(e.getTargetVertexId(), NullWritable.get());

                        if (messages.getSize() > maxMessagesSizePerWorker) {
                                    new SendTokenDepRequest(messages));

        // send remaining messages
        for (int dstTaskId : msgCache.keySet()) {
            // no need to remove, map will be trashed entirely
            VertexIdData<I, NullWritable> messages = msgCache.get(dstTaskId);
            serviceWorker.getWorkerClient().sendWritableRequest(dstTaskId, new SendTokenDepRequest(messages));

        // flush network

     * Process a received dependency.
     * @param vertexId Id of vertex that has dependency
     * @param isLocal True of dependency is local (within same worker)
    public synchronized void receiveDependency(I vertexId, boolean isLocal) {
        // Function must be completely synchronized b/c of concurrent
        // gets and sets/mutations to multiple sets
        VertexType type = getVertexType(vertexId);

        if (isLocal) {
            // remote -> mixed, internal -> local
            // (remote -> mixed is b/c we now have local dependency)
            switch (type) {
            case REMOTE_BOUNDARY:
                synchronizedRemove(remoteBoundaryVertices, vertexId);
                synchronizedAdd(mixedBoundaryVertices, vertexId);
            case LOCAL_BOUNDARY:
                // fall through
            case MIXED_BOUNDARY:
                synchronizedAdd(localBoundaryVertices, vertexId);
        } else {
            // local -> mixed, internal -> remote
            // (local -> mixed is b/c we now have remote dependency)
            switch (type) {
            case LOCAL_BOUNDARY:
                synchronizedRemove(localBoundaryVertices, vertexId);
                synchronizedAdd(mixedBoundaryVertices, vertexId);
            case REMOTE_BOUNDARY:
                // fall through
            case MIXED_BOUNDARY:
                synchronizedAdd(remoteBoundaryVertices, vertexId);

     * Prints all stored dependencies. Not thread safe.
    public void printAll() {
        for (Object i : localBoundaryVertices) {
  "[[VSTORE]] loc " + i);
        for (Object i : remoteBoundaryVertices) {
  "[[VSTORE]] rem " + i);
        for (Object i : mixedBoundaryVertices) {
  "[[VSTORE]] mix " + i);

     * Set/tag a vertex id with the specified type.
     * @param vertexId Vertex id
     * @param type Vertex type
    public void setVertexType(I vertexId, VertexType type) {
        switch (type) {
        case INTERNAL:
            // implicit (absence from all sets indicates internal)
        case LOCAL_BOUNDARY:
            synchronizedAdd(localBoundaryVertices, vertexId);
        case REMOTE_BOUNDARY:
            synchronizedAdd(remoteBoundaryVertices, vertexId);
        case MIXED_BOUNDARY:
            synchronizedAdd(mixedBoundaryVertices, vertexId);
            throw new RuntimeException("Invalid vertex type!");

     * Whether a vertex id belongs to a particular vertex type.
     * Thread-safe for concurrent is/getVertexType() calls ONLY.
     * This is NOT thread-safe with concurrent setVertexType() calls!
     * @param vertexId Vertex id
     * @param type Vertex type
     * @return True if vertexId has a matching vertex type
    public boolean isVertexType(I vertexId, VertexType type) {
        // don't need synchronize b/c only time we write
        // to these sets is during input loading or mutations,
        // when no compute threads are running, and we read only
        // during computation (when compute threads are running)
        switch (type) {
        case INTERNAL:
            return !(contains(localBoundaryVertices, vertexId) || contains(remoteBoundaryVertices, vertexId)
                    || contains(mixedBoundaryVertices, vertexId));
        case LOCAL_BOUNDARY:
            return contains(localBoundaryVertices, vertexId);
        case REMOTE_BOUNDARY:
            return contains(remoteBoundaryVertices, vertexId);
        case MIXED_BOUNDARY:
            return contains(mixedBoundaryVertices, vertexId);
            throw new RuntimeException("Invalid vertex type!");

     * Get vertex type for specified vertex id.
     * Thread-safe for concurrent is/getVertexType() calls ONLY.
     * This is NOT thread-safe with concurrent setVertexType() calls!
     * @param vertexId Vertex id
     * @return Type of vertex
    public VertexType getVertexType(I vertexId) {
        if (contains(localBoundaryVertices, vertexId)) {
            return VertexType.LOCAL_BOUNDARY;
        } else if (contains(remoteBoundaryVertices, vertexId)) {
            return VertexType.REMOTE_BOUNDARY;
        } else if (contains(mixedBoundaryVertices, vertexId)) {
            return VertexType.MIXED_BOUNDARY;
        } else {
            // TODO-YH: this does NOT currently support graph mutations!
            // Mutations can add vertices, which will be erroneously treated
            // as internal. Handling mutations requires proper re-check of
            // all out-edge AND in-edge neighbours.
            return VertexType.INTERNAL;

     * Checks if vertex id exists in a set. Not synchronized.
     * Thread-safe iff set will not be concurrently mutated.
     * @param set Set to check
     * @param vertexId Vertex id to look for
     * @return True if vertexId is in set
    private boolean contains(Set set, I vertexId) {
        Class<I> vertexIdClass = conf.getVertexIdClass();
        if (vertexIdClass.equals(IntWritable.class)) {
            return set.contains(((IntWritable) vertexId).get());
        } else if (vertexIdClass.equals(LongWritable.class)) {
            return set.contains(((LongWritable) vertexId).get());
        } else {
            return set.contains(vertexId);

     * Adds vertexId to set, synchronizing on set.
     * @param set Set to add to
     * @param vertexId Vertex id to add
    private void synchronizedAdd(Set set, I vertexId) {
        Class<I> vertexIdClass = conf.getVertexIdClass();
        synchronized (set) {
            if (vertexIdClass.equals(IntWritable.class)) {
                set.add(((IntWritable) vertexId).get());
            } else if (vertexIdClass.equals(LongWritable.class)) {
                set.add(((LongWritable) vertexId).get());
            } else {
                // need to clone id when not primitive
                set.add(WritableUtils.clone(vertexId, conf));

     * Removes vertexId from set, synchronizing on set.
     * @param set Set to remove from
     * @param vertexId Vertex id to remove
    private void synchronizedRemove(Set set, I vertexId) {
        Class<I> vertexIdClass = conf.getVertexIdClass();
        synchronized (set) {
            if (vertexIdClass.equals(IntWritable.class)) {
                set.remove(((IntWritable) vertexId).get());
            } else if (vertexIdClass.equals(LongWritable.class)) {
                set.remove(((LongWritable) vertexId).get());
            } else {

     * @return Number of local (only) boundary vertices.
    public int numLocalBoundaryVertices() {
        return localBoundaryVertices.size();

     * @return Number of remote (only) boundary vertices.
    public int numRemoteBoundaryVertices() {
        return remoteBoundaryVertices.size();

     * @return Number of mixed boundary vertices.
    public int numMixedBoundaryVertices() {
        return mixedBoundaryVertices.size();