org.apache.giraph.block_app.library.pagerank.PageRankTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.giraph.block_app.library.pagerank.PageRankTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.giraph.block_app.library.pagerank;

import org.apache.giraph.block_app.framework.BlockUtils;
import org.apache.giraph.block_app.framework.api.local.LocalBlockRunner;
import org.apache.giraph.conf.GiraphConfiguration;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.edge.EdgeFactory;
import org.apache.giraph.graph.Vertex;
import org.apache.giraph.utils.InternalVertexRunner;
import org.apache.giraph.utils.TestGraph;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.junit.Assert;
import org.junit.Test;

import com.google.common.collect.Lists;

import java.util.AbstractMap;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Pagerank test
 */
public class PageRankTest {
    private static final int NUMBER_OF_ITERATIONS = 50;
    private static final double PRECISION = 0.0000001;

    public static void testComputation(ExampleGenerator generator) throws Exception {
        GiraphConfiguration conf = new GiraphConfiguration();
        PageRankSettings.ITERATIONS.set(conf, NUMBER_OF_ITERATIONS);
        BlockUtils.setAndInitBlockFactoryClass(conf, PageRankBlockFactory.class);

        final WeightedPageRankTestExample example = generator.generate(conf);

        LocalBlockRunner.runAppWithVertexOutput(example.graph, (vertex) -> {
            Long id = vertex.getId().get();
            double expected = example.expectedOutput.get(id);
            double received = vertex.getValue().get();
            Assert.assertEquals(expected, received, PRECISION);
        });
    }

    @Test
    public void testCliqueWeightedVertex() throws Exception {
        testComputation(PageRankTest::createCliqueExample);
    }

    @Test
    public void testRingWeightedVertex() throws Exception {
        testComputation(PageRankTest::createRingExample);
    }

    @Test
    public void testOneVertexConnectedToAllWeightedVertex() throws Exception {
        testComputation(PageRankTest::createOneVertexConnectedToAllExample);
    }

    @Test
    public void testAllVerticesConnectedToOneWeightedVertex() throws Exception {
        testComputation(PageRankTest::createAllVerticesConnectedToOne);
    }

    @Test
    public void testSmallChainWeightedVertex() throws Exception {
        testComputation(PageRankTest::createSmallChainExample);
    }

    @Test
    public void compareWithUnweightedPageRank() throws Exception {
        int numVertices = 100;
        int maxEdges = 50;
        float dampingFactor = 0.85f;

        GiraphConfiguration wprConf = new GiraphConfiguration();
        PageRankSettings.WEIGHTED_PAGERANK.set(wprConf, true);
        PageRankSettings.ITERATIONS.set(wprConf, NUMBER_OF_ITERATIONS);
        PageRankSettings.DAMPING_FACTOR.set(wprConf, dampingFactor);
        BlockUtils.setAndInitBlockFactoryClass(wprConf, PageRankBlockFactory.class);

        GiraphConfiguration prConf = new GiraphConfiguration();
        PageRankSettings.WEIGHTED_PAGERANK.set(prConf, false);
        PageRankSettings.ITERATIONS.set(prConf, NUMBER_OF_ITERATIONS);
        PageRankSettings.DAMPING_FACTOR.set(prConf, dampingFactor);
        BlockUtils.setAndInitBlockFactoryClass(prConf, PageRankBlockFactory.class);

        TestGraph<LongWritable, DoubleWritable, DoubleWritable> wprGraph = new TestGraph<>(wprConf);
        TestGraph<LongWritable, DoubleWritable, NullWritable> prGraph = new TestGraph<>(prConf);
        for (int i = 0; i < numVertices; i++) {
            int[] neighbors = new int[(int) (Math.random() * maxEdges)];
            double[] edgeWeights = new double[neighbors.length];
            for (int j = 0; j < neighbors.length; j++) {
                neighbors[j] = (int) (Math.random() * numVertices);
                edgeWeights[j] = 1.0;
            }
            prGraph.addVertex(new LongWritable(i), new DoubleWritable(1.0), createEdgesWeightless(neighbors));
            wprGraph.addVertex(new LongWritable(i), new DoubleWritable(1.0), createEdges(neighbors, edgeWeights));
        }

        wprGraph = InternalVertexRunner.runWithInMemoryOutput(wprConf, wprGraph);

        prGraph = InternalVertexRunner.runWithInMemoryOutput(prConf, prGraph);

        for (Vertex<LongWritable, DoubleWritable, DoubleWritable> wprVertex : wprGraph) {
            Vertex<LongWritable, DoubleWritable, NullWritable> prVertex = prGraph.getVertex(wprVertex.getId());
            Assert.assertEquals(prVertex.getValue().get(), wprVertex.getValue().get(), PRECISION);
        }
    }

    /**
     * Creates a map of weighted edges from neighbors and edgeWeights
     *
     * @param neighbors  neighbors
     * @param edgeWeights edgeWeights
     * @return  returns the edges
     */
    private static Map.Entry<LongWritable, DoubleWritable>[] createEdges(int[] neighbors, double[] edgeWeights) {
        Map.Entry<LongWritable, DoubleWritable>[] edges = new Map.Entry[neighbors.length];
        for (int i = 0; i < neighbors.length; i++) {
            edges[i] = new AbstractMap.SimpleEntry<>(new LongWritable(neighbors[i]),
                    new DoubleWritable(edgeWeights[i]));
        }
        return edges;
    }

    /**
     * Creates a map of unweighted edges from neighbors
     *
     * @param neighbors  neighbors
     * @return  returns the edges
     */
    private static Map.Entry<LongWritable, NullWritable>[] createEdgesWeightless(int[] neighbors) {
        Map.Entry<LongWritable, NullWritable>[] edges = new Map.Entry[neighbors.length];
        for (int i = 0; i < neighbors.length; i++) {
            edges[i] = new AbstractMap.SimpleEntry<>(new LongWritable(neighbors[i]), NullWritable.get());
        }
        return edges;
    }

    /**
     * Helper class for data related to one test case for weighted page rank.
     */
    private static class WeightedPageRankTestExample {
        TestGraph<LongWritable, DoubleWritable, DoubleWritable> graph;
        Map<Long, Double> expectedOutput = new HashMap<>();
    }

    private interface ExampleGenerator {
        WeightedPageRankTestExample generate(GiraphConfiguration conf);
    }

    /**
     * Create test case when graph is a clique. All outgoing edges from one
     * vertex have the same weights, so they will be normalized to 1/n,
     * and all vertices should have page rank 1.
     */
    private static WeightedPageRankTestExample createCliqueExample(GiraphConfiguration conf) {
        WeightedPageRankTestExample example = new WeightedPageRankTestExample();
        example.graph = new TestGraph<>(conf);
        addVertex(1, new long[] { 2, 3, 4 }, new double[] { 1, 1, 1 }, example.graph);
        addVertex(2, new long[] { 1, 3, 4 }, new double[] { 2, 2, 2 }, example.graph);
        addVertex(3, new long[] { 1, 2, 4 }, new double[] { 0.1, 0.1, 0.1 }, example.graph);
        addVertex(4, new long[] { 1, 2, 3 }, new double[] { 5, 5, 5 }, example.graph);
        example.expectedOutput.put(1L, 1.0);
        example.expectedOutput.put(2L, 1.0);
        example.expectedOutput.put(3L, 1.0);
        example.expectedOutput.put(4L, 1.0);
        return example;
    }

    public static void addVertex(int id, long[] edges, double[] weights,
            TestGraph<LongWritable, DoubleWritable, DoubleWritable> graph) {
        Vertex<LongWritable, DoubleWritable, DoubleWritable> v = graph.getConf().createVertex();
        v.setConf(graph.getConf());
        v.initialize(new LongWritable(id), new DoubleWritable(), newEdges(edges, weights));
        graph.addVertex(v);
    }

    private static Iterable<Edge<LongWritable, DoubleWritable>> newEdges(long[] ids, double[] weights) {
        List<Edge<LongWritable, DoubleWritable>> edges = Lists.newArrayListWithCapacity(ids.length);
        for (int i = 0; i < ids.length; i++) {
            edges.add(EdgeFactory.create(new LongWritable(ids[i]), new DoubleWritable(weights[i])));
        }
        return edges;
    }

    /**
     * Create test case when graph is a simple cycle. All vertices have just
     * one outgoing edge, so their weights are all going to be normalized to 1,
     * and all vertices should have page rank 1.
     */
    private static WeightedPageRankTestExample createRingExample(GiraphConfiguration conf) {
        WeightedPageRankTestExample example = new WeightedPageRankTestExample();
        example.graph = new TestGraph<>(conf);
        addVertex(1, new long[] { 2 }, new double[] { 1 }, example.graph);
        addVertex(2, new long[] { 3 }, new double[] { 2 }, example.graph);
        addVertex(3, new long[] { 4 }, new double[] { 1 }, example.graph);
        addVertex(4, new long[] { 5 }, new double[] { 5 }, example.graph);
        addVertex(5, new long[] { 6 }, new double[] { 0.7 }, example.graph);
        addVertex(6, new long[] { 7 }, new double[] { 2 }, example.graph);
        addVertex(7, new long[] { 8 }, new double[] { 0.3 }, example.graph);
        addVertex(8, new long[] { 1 }, new double[] { 5 }, example.graph);

        for (long i = 1; i <= 8; i++) {
            example.expectedOutput.put(i, 1.0);
        }
        return example;
    }

    /**
     * Create test case when we have one vertex X which has outgoing edges to
     * all other vertices, and all other vertices have just one outgoing
     * edge to X.
     * Page rank of X should be (1 + d * (n - 1)) / (d + 1),
     * where d is dumping factor and n total number of vertices.
     * Page rank of some other vertex Y should be 1 - d + d * pr(X) * y,
     * where y is normalized weight of edge X->Y.
     */
    private static WeightedPageRankTestExample createOneVertexConnectedToAllExample(GiraphConfiguration conf) {
        WeightedPageRankTestExample example = new WeightedPageRankTestExample();
        PageRankSettings.DAMPING_FACTOR.set(conf, 0.85f);
        example.graph = new TestGraph<>(conf);
        addVertex(1, new long[] { 2, 3, 4, 5 }, new double[] { 1, 2, 3, 4 }, example.graph);
        addVertex(2, new long[] { 1 }, new double[] { 2 }, example.graph);
        addVertex(3, new long[] { 1 }, new double[] { 0.1 }, example.graph);
        addVertex(4, new long[] { 1 }, new double[] { 5 }, example.graph);
        addVertex(5, new long[] { 1 }, new double[] { 5 }, example.graph);
        // these values are obtained from eigenvector calculation in numpy
        example.expectedOutput.put(1L, 2.37797072308);
        example.expectedOutput.put(2L, 0.35220291338);
        example.expectedOutput.put(3L, 0.55440585061);
        example.expectedOutput.put(4L, 0.75660878784);
        example.expectedOutput.put(5L, 0.95881172507);
        return example;
    }

    /**
     * Create test case when we have 4 vertices, A,B,C,D,
     * with edges in both directions between A and B, B and C, and C and D.
     * If d is dumping factor, b weight of edge B->A and c weight of edge
     * C->D, the formulas below are calculating the page rank of vertices.
     */
    private static WeightedPageRankTestExample createSmallChainExample(GiraphConfiguration conf) {
        WeightedPageRankTestExample example = new WeightedPageRankTestExample();
        PageRankSettings.DAMPING_FACTOR.set(conf, 0.9f);
        example.graph = new TestGraph<>(conf);
        addVertex(1, new long[] { 2 }, new double[] { 3 }, example.graph);
        addVertex(2, new long[] { 1, 3 }, new double[] { 3, 7 }, example.graph);
        addVertex(3, new long[] { 2, 4 }, new double[] { 4, 6 }, example.graph);
        addVertex(4, new long[] { 3 }, new double[] { 5 }, example.graph);
        // these values are obtained from eigenvector calculation in numpy
        example.expectedOutput.put(1L, 0.3762585);
        example.expectedOutput.put(2L, 1.0231795);
        example.expectedOutput.put(3L, 1.62374149);
        example.expectedOutput.put(4L, 0.97682040);
        return example;
    }

    /**
     * Create a test with 4 vertices, 3 vertices are connected to the first
     * Tests the code in presence of sinks / dangling vertices
     * @return example
     */
    private static WeightedPageRankTestExample createAllVerticesConnectedToOne(GiraphConfiguration conf) {
        WeightedPageRankTestExample example = new WeightedPageRankTestExample();
        PageRankSettings.DAMPING_FACTOR.set(conf, 0.85f);
        example.graph = new TestGraph<>(conf);
        addVertex(1, new long[] {}, new double[] {}, example.graph);
        addVertex(2, new long[] { 1 }, new double[] { 3 }, example.graph);
        addVertex(3, new long[] { 1 }, new double[] { 4 }, example.graph);
        addVertex(4, new long[] { 1 }, new double[] { 5 }, example.graph);
        // these values are obtained from eigenvector calculation in numpy
        example.expectedOutput.put(1L, 2.16793893);
        example.expectedOutput.put(2L, 0.61068702);
        example.expectedOutput.put(3L, 0.61068702);
        example.expectedOutput.put(4L, 0.61068702);
        return example;
    }
}