org.apache.giraph.block_app.library.prepare_graph.UndirectedConnectedComponents.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.giraph.block_app.library.prepare_graph.UndirectedConnectedComponents.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.giraph.block_app.library.prepare_graph;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.giraph.block_app.framework.api.BlockMasterApi;
import org.apache.giraph.block_app.framework.api.BlockWorkerReceiveApi;
import org.apache.giraph.block_app.framework.api.BlockWorkerSendApi;
import org.apache.giraph.block_app.framework.api.CreateReducersApi;
import org.apache.giraph.block_app.framework.block.Block;
import org.apache.giraph.block_app.framework.block.RepeatUntilBlock;
import org.apache.giraph.block_app.framework.block.SequenceBlock;
import org.apache.giraph.block_app.framework.piece.Piece;
import org.apache.giraph.block_app.framework.piece.global_comm.ReducerHandle;
import org.apache.giraph.block_app.framework.piece.interfaces.VertexReceiver;
import org.apache.giraph.block_app.framework.piece.interfaces.VertexSender;
import org.apache.giraph.block_app.library.Pieces;
import org.apache.giraph.block_app.library.SendMessageChain;
import org.apache.giraph.block_app.library.VertexSuppliers;
import org.apache.giraph.block_app.reducers.map.BasicMapReduce;
import org.apache.giraph.combiner.MessageCombiner;
import org.apache.giraph.combiner.MinMessageCombiner;
import org.apache.giraph.combiner.SumMessageCombiner;
import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.function.Consumer;
import org.apache.giraph.function.ObjectTransfer;
import org.apache.giraph.function.Supplier;
import org.apache.giraph.function.vertex.ConsumerWithVertex;
import org.apache.giraph.function.vertex.SupplierFromVertex;
import org.apache.giraph.graph.Vertex;
import org.apache.giraph.reducers.impl.MaxPairReducer;
import org.apache.giraph.reducers.impl.SumReduce;
import org.apache.giraph.types.NoMessage;
import org.apache.giraph.types.ops.LongTypeOps;
import org.apache.giraph.types.ops.NumericTypeOps;
import org.apache.giraph.types.ops.TypeOps;
import org.apache.giraph.writable.tuple.LongLongWritable;
import org.apache.giraph.writable.tuple.PairWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.log4j.Logger;

import com.google.common.collect.Iterators;

/**
 * Class for creating utility blocks for calculating and processing
 * connected components.
 *
 * Graph is expected to be symmetric before calling any of the methods here.
 */
public class UndirectedConnectedComponents {
    private static final Logger LOG = Logger.getLogger(UndirectedConnectedComponents.class);

    private UndirectedConnectedComponents() {
    }

    /** Initialize vertex values for connected components calculation */
    private static <I extends WritableComparable, V extends Writable> Piece<I, V, Writable, NoMessage, Object> createInitializePiece(
            TypeOps<I> idTypeOps, Consumer<Boolean> vertexUpdatedComponent,
            ConsumerWithVertex<I, V, Writable, I> setComponent,
            SupplierFromVertex<I, V, Writable, ? extends Iterable<? extends Edge<I, ?>>> edgeSupplier) {
        I result = idTypeOps.create();
        return Pieces.forAllVerticesOnReceive("InitializeCC", (vertex) -> {
            idTypeOps.set(result, vertex.getId());
            boolean updated = false;
            for (Edge<I, ?> edge : edgeSupplier.get(vertex)) {
                if (result.compareTo(edge.getTargetVertexId()) > 0) {
                    idTypeOps.set(result, edge.getTargetVertexId());
                    updated = true;
                }
            }
            setComponent.apply(vertex, result);
            vertexUpdatedComponent.apply(updated);
        });
    }

    /** Propagate connected components to neighbor pieces */
    private static class PropagateConnectedComponentsPiece<I extends WritableComparable, V extends Writable>
            extends Piece<I, V, Writable, I, Object> {
        private final TypeOps<I> idTypeOps;
        private final MinMessageCombiner<I, I> minMessageCombiner;
        private final Supplier<Boolean> vertexToPropagate;
        private final Consumer<Boolean> vertexUpdatedComponent;
        private final Consumer<Boolean> converged;
        private final SupplierFromVertex<I, V, Writable, I> getComponent;
        private final ConsumerWithVertex<I, V, Writable, I> setComponent;
        private final SupplierFromVertex<I, V, Writable, ? extends Iterable<? extends Edge<I, ?>>> edgeSupplier;

        private ReducerHandle<LongWritable, LongWritable> propagatedAggregator;

        PropagateConnectedComponentsPiece(TypeOps<I> idTypeOps, Supplier<Boolean> vertexToPropagate,
                Consumer<Boolean> vertexUpdatedComponent, Consumer<Boolean> converged,
                SupplierFromVertex<I, V, Writable, I> getComponent,
                ConsumerWithVertex<I, V, Writable, I> setComponent,
                SupplierFromVertex<I, V, Writable, ? extends Iterable<? extends Edge<I, ?>>> edgeSupplier) {
            this.idTypeOps = idTypeOps;
            this.minMessageCombiner = idTypeOps instanceof NumericTypeOps
                    ? new MinMessageCombiner<>((NumericTypeOps<I>) idTypeOps)
                    : null;
            this.vertexToPropagate = vertexToPropagate;
            this.vertexUpdatedComponent = vertexUpdatedComponent;
            this.converged = converged;
            this.getComponent = getComponent;
            this.setComponent = setComponent;
            this.edgeSupplier = edgeSupplier;
        }

        @Override
        public void registerReducers(CreateReducersApi reduceApi, Object executionStage) {
            propagatedAggregator = reduceApi.createLocalReducer(SumReduce.LONG);
        }

        @Override
        public VertexSender<I, V, Writable> getVertexSender(final BlockWorkerSendApi<I, V, Writable, I> workerApi,
                Object executionStage) {
            final LongWritable one = new LongWritable(1);
            return vertex -> {
                if (vertexToPropagate.get()) {
                    workerApi.sendMessageToMultipleEdges(Iterators.transform(edgeSupplier.get(vertex).iterator(),
                            edge -> edge.getTargetVertexId()), getComponent.get(vertex));
                    propagatedAggregator.reduce(one);
                }
            };
        }

        @Override
        public void masterCompute(BlockMasterApi master, Object executionStage) {
            converged.apply(propagatedAggregator.getReducedValue(master).get() == 0);
            LOG.info("Undirected CC: " + propagatedAggregator.getReducedValue(master).get()
                    + " many vertices sent in this iteration");
        }

        @Override
        public VertexReceiver<I, V, Writable, I> getVertexReceiver(BlockWorkerReceiveApi<I> workerApi,
                Object executionStage) {
            return new InnerVertexReceiver() {
                private final I newComponent = idTypeOps.create();

                @Override
                public void vertexReceive(Vertex<I, V, Writable> vertex, Iterable<I> messages) {
                    idTypeOps.set(newComponent, getComponent.get(vertex));
                    for (I value : messages) {
                        if (newComponent.compareTo(value) > 0) {
                            idTypeOps.set(newComponent, value);
                        }
                    }

                    I cur = getComponent.get(vertex);
                    if (cur.compareTo(newComponent) > 0) {
                        setComponent.apply(vertex, newComponent);
                        vertexUpdatedComponent.apply(true);
                    } else {
                        vertexUpdatedComponent.apply(false);
                    }
                }
            };
        }

        @Override
        public MessageCombiner<? super I, I> getMessageCombiner(ImmutableClassesGiraphConfiguration conf) {
            return minMessageCombiner;
        }

        @Override
        public Class<I> getMessageClass() {
            return minMessageCombiner == null ? idTypeOps.getTypeClass() : null;
        }

        @Override
        protected boolean allowOneMessageToManyIdsEncoding() {
            return true;
        }
    }

    /** Calculates number of components, and the number of active vertices */
    public static final class CalculateNumberOfComponents<V extends Writable>
            extends Piece<LongWritable, V, Writable, LongWritable, Object> {
        private final Consumer<LongWritable> numActiveConsumer;
        private final Consumer<LongWritable> numComponentsConsumer;
        private final SupplierFromVertex<LongWritable, V, Writable, LongWritable> getComponent;

        private ReducerHandle<LongWritable, LongWritable> numComponentsAggregator;
        private ReducerHandle<LongWritable, LongWritable> numActiveAggregator;

        public CalculateNumberOfComponents(Consumer<LongWritable> numActiveConsumer,
                Consumer<LongWritable> numComponentsConsumer,
                SupplierFromVertex<LongWritable, V, Writable, LongWritable> getComponent) {
            this.numActiveConsumer = numActiveConsumer;
            this.numComponentsConsumer = numComponentsConsumer;
            this.getComponent = getComponent;
        }

        @Override
        public void registerReducers(CreateReducersApi reduceApi, Object executionStage) {
            numComponentsAggregator = reduceApi.createLocalReducer(SumReduce.LONG);
            numActiveAggregator = reduceApi.createLocalReducer(SumReduce.LONG);
        }

        @Override
        public VertexSender<LongWritable, V, Writable> getVertexSender(
                BlockWorkerSendApi<LongWritable, V, Writable, LongWritable> workerApi, Object executionStage) {
            final LongWritable one = new LongWritable(1);
            return new InnerVertexSender() {
                @Override
                public void vertexSend(Vertex<LongWritable, V, Writable> vertex) {
                    numActiveAggregator.reduce(one);
                    // Only aggregate if you are the minimum of your CC
                    if (vertex.getId().get() == getComponent.get(vertex).get()) {
                        numComponentsAggregator.reduce(one);
                    }
                }
            };
        }

        @Override
        public void masterCompute(BlockMasterApi master, Object executionStage) {
            numActiveConsumer.apply(numActiveAggregator.getReducedValue(master));
            numComponentsConsumer.apply(numComponentsAggregator.getReducedValue(master));
            LOG.info("Num active is : " + numActiveAggregator.getReducedValue(master));
            LOG.info("Num components is : " + numComponentsAggregator.getReducedValue(master));
        }

        @Override
        protected MessageCombiner<? super LongWritable, LongWritable> getMessageCombiner(
                ImmutableClassesGiraphConfiguration conf) {
            return SumMessageCombiner.LONG;
        }
    }

    /**
     * Calculate connected components, doing as many iterations as needed,
     * but no more than maxIterations.
     *
     * Graph is expected to be symmetric.
     */
    public static <I extends WritableComparable, V extends Writable> Block calculateConnectedComponents(
            int maxIterations, TypeOps<I> idTypeOps, final SupplierFromVertex<I, V, Writable, I> getComponent,
            final ConsumerWithVertex<I, V, Writable, I> setComponent,
            SupplierFromVertex<I, V, Writable, ? extends Iterable<? extends Edge<I, ?>>> edgeSupplier) {
        ObjectTransfer<Boolean> converged = new ObjectTransfer<>();
        ObjectTransfer<Boolean> vertexUpdatedComponent = new ObjectTransfer<>();

        return new SequenceBlock(
                createInitializePiece(idTypeOps, vertexUpdatedComponent, setComponent, edgeSupplier),
                new RepeatUntilBlock(maxIterations,
                        new PropagateConnectedComponentsPiece<>(idTypeOps, vertexUpdatedComponent,
                                vertexUpdatedComponent, converged, getComponent, setComponent, edgeSupplier),
                        converged));
    }

    /**
     * Default block, which calculates connected components using the
     * vertex's default edges.
     */
    public static <I extends WritableComparable, V extends Writable> Block calculateConnectedComponents(
            int maxIterations, TypeOps<I> idTypeOps, final SupplierFromVertex<I, V, Writable, I> getComponent,
            final ConsumerWithVertex<I, V, Writable, I> setComponent) {
        return calculateConnectedComponents(maxIterations, idTypeOps, getComponent, setComponent,
                VertexSuppliers.vertexEdgesSupplier());
    }

    public static <V extends Writable> Block calculateConnectedComponents(int maxIterations,
            SupplierFromVertex<LongWritable, V, Writable, LongWritable> getComponent,
            ConsumerWithVertex<LongWritable, V, Writable, LongWritable> setComponent) {
        return calculateConnectedComponents(maxIterations, LongTypeOps.INSTANCE, getComponent, setComponent,
                VertexSuppliers.vertexEdgesSupplier());
    }

    /**
     * Calculates sizes of all components by aggregating on master, and allows
     * each vertex to consume its size. Differs from CalculateComponentSizesPiece
     * in that aggregation happens on master, instead of message sends to the
     * component_id.
     */
    public static <V extends Writable> Block calculateConnectedComponentSizes(
            SupplierFromVertex<LongWritable, V, Writable, LongWritable> getComponent,
            ConsumerWithVertex<LongWritable, V, Writable, LongWritable> sizeConsumer) {
        Pair<LongWritable, LongWritable> componentToReducePair = Pair.of(new LongWritable(), new LongWritable(1));
        LongWritable reusableLong = new LongWritable();
        return Pieces.reduceAndBroadcast("CalcConnectedComponentSizes",
                new BasicMapReduce<>(LongTypeOps.INSTANCE, LongTypeOps.INSTANCE, SumReduce.LONG),
                (Vertex<LongWritable, V, Writable> vertex) -> {
                    componentToReducePair.getLeft().set(getComponent.get(vertex).get());
                    return componentToReducePair;
                }, (vertex, componentSizes) -> {
                    long compSize = componentSizes.get(getComponent.get(vertex)).get();
                    reusableLong.set(compSize);
                    sizeConsumer.apply(vertex, reusableLong);
                });
    }

    /**
     * Given a graph with already calculated connected components - calculates
     * ID of the largest one.
     */
    public static <V extends Writable> Block calculateLargestConnectedComponentStats(
            SupplierFromVertex<LongWritable, V, Writable, LongWritable> getComponent,
            Consumer<PairWritable<LongWritable, LongWritable>> largestComponentConsumer) {
        LongWritable one = new LongWritable(1);
        LongLongWritable pair = new LongLongWritable();
        return SendMessageChain
                .<LongWritable, V, Writable, LongWritable>startSend("CalcComponentSizesPiece",
                        SumMessageCombiner.LONG, (vertex) -> one,
                        (vertex) -> Iterators.singletonIterator(getComponent.get(vertex)))
                .endReduce("CalcLargestComponent", new MaxPairReducer<>(LongTypeOps.INSTANCE, LongTypeOps.INSTANCE),
                        (vertex, message) -> {
                            long curSum = message != null ? message.get() : 0;
                            pair.getLeft().set(getComponent.get(vertex).get());
                            pair.getRight().set(curSum);
                            return pair;
                        }, largestComponentConsumer);
    }

    /**
     * Given a graph with already calculated connected components - calculates
     * ID of the largest one.
     */
    public static <V extends Writable> Block calculateLargestConnectedComponent(
            SupplierFromVertex<LongWritable, V, Writable, LongWritable> getComponent,
            LongWritable largestComponent) {
        return calculateLargestConnectedComponentStats(getComponent,
                (t) -> largestComponent.set(t.getLeft().get()));
    }

    /**
     * Given a graph with already calculated connected components - calculates
     * size of the largest one.
     */
    public static <V extends Writable> Block calculateLargestConnectedComponentSize(
            SupplierFromVertex<LongWritable, V, Writable, LongWritable> getComponent,
            LongWritable largestComponentSize) {
        return calculateLargestConnectedComponentStats(getComponent,
                (t) -> largestComponentSize.set(t.getRight().get()));
    }

    /**
     * Takes symmetric graph, and removes all edges/vertices that
     * are not in largest connected component.
     */
    public static <V extends Writable> Block calculateAndKeepLargestComponent(int maxIterations,
            SupplierFromVertex<LongWritable, V, Writable, LongWritable> getComponent,
            ConsumerWithVertex<LongWritable, V, Writable, LongWritable> setComponent) {
        final LongWritable largestComponent = new LongWritable();
        return new SequenceBlock(
                calculateConnectedComponents(maxIterations, LongTypeOps.INSTANCE, getComponent, setComponent,
                        VertexSuppliers.vertexEdgesSupplier()),
                calculateLargestConnectedComponent(getComponent, largestComponent),
                Pieces.<LongWritable, V, Writable>removeVertices("KeepOnlyLargestComponent",
                        (vertex) -> !largestComponent.equals(getComponent.get(vertex))));
    }

    /**
     * Takes symmetric graph, and removes all edges/vertices that
     * are belong to connected components smaller than specified
     * threshold
     */
    public static <V extends Writable> Block calculateAndKeepComponentAboveThreshold(int maxIterations,
            int threshold, SupplierFromVertex<LongWritable, V, Writable, LongWritable> getComponent,
            ConsumerWithVertex<LongWritable, V, Writable, LongWritable> setComponent) {
        final ObjectTransfer<Boolean> belowThreshold = new ObjectTransfer<>();
        return new SequenceBlock(
                UndirectedConnectedComponents.calculateConnectedComponents(maxIterations, LongTypeOps.INSTANCE,
                        getComponent, setComponent),
                UndirectedConnectedComponents.calculateConnectedComponentSizes(getComponent, (vertex, value) -> {
                    belowThreshold.apply(value.get() < threshold);
                }), Pieces.removeVertices("KeepAboveTresholdComponents", belowThreshold.castToSupplier()));
    }
}