org.sf.xrime.algorithms.transform.vertex.AdjVertex2VertexSetTransformer.java Source code

Introduction

Here is the source code for org.sf.xrime.algorithms.transform.vertex.AdjVertex2VertexSetTransformer.java
Source

/*
 * Copyright (C) IBM Corp. 2009.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.sf.xrime.algorithms.transform.vertex;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.sf.xrime.ProcessorExecutionException;
import org.sf.xrime.Transformer;
import org.sf.xrime.model.edge.Edge;
import org.sf.xrime.model.vertex.AdjVertex;
import org.sf.xrime.model.vertex.Vertex;
import org.sf.xrime.model.vertex.VertexSet;

/**
 * <p>Transform the adjacent vertexes list of each vertex in the graph into a vertex set, which
 * cover the vertex and all its neighbours. We don't care the input adjacent vertexes list is
 * "incoming" or "outgoing". The resulting set will cover both of them. </p>
 * 
 * <p>In this way, we get a coverage of the graph, which consists of those vertex sets.</p>
 * @author xue
 */
public class AdjVertex2VertexSetTransformer extends Transformer {
    /**
     * Default constructor.
     */
    public AdjVertex2VertexSetTransformer() {
        super();
    }

    /**
     * Another constructor.
     * @param src
     * @param dest
     */
    public AdjVertex2VertexSetTransformer(Path src, Path dest) {
        super(src, dest);
    }

    /**
     * Emit a vertex (key) and its surrounding neighbours (value).
     * @author xue
     */
    public static class MapClass extends MapReduceBase implements Mapper<Text, AdjVertex, Text, VertexSet> {

        @Override
        public void map(Text key, AdjVertex value, OutputCollector<Text, VertexSet> output, Reporter reporter)
                throws IOException {
            // The "forward" adjacent vertex set.
            VertexSet forward_set = new VertexSet();
            // Add the key vertex itself.
            forward_set.addVertex(new Vertex(key.toString()));
            // The "backward" vertex set.
            VertexSet backward_set = new VertexSet();
            // Add the key vertex itself.
            backward_set.addVertex(new Vertex(key.toString()));
            // Get all edges.
            List<Edge> edges = value.getEdges();
            // Process each edge in the adjacent vertexes list.
            for (Edge edge : edges) {
                forward_set.addVertex(new Vertex(edge.getFrom()));
                output.collect(new Text(edge.getFrom()), backward_set);
                forward_set.addVertex(new Vertex(edge.getTo()));
                output.collect(new Text(edge.getTo()), backward_set);
            }
            output.collect(key, forward_set);
        }
    }

    /**
     * Create the vertex set cooresponding to each vertex and its neighbours. In another
     * word, merge neighbours (value) of a vertex (key) into a vertex set.
     * @author xue
     */
    public static class ReduceClass extends MapReduceBase implements Reducer<Text, VertexSet, Text, VertexSet> {

        @Override
        public void reduce(Text key, Iterator<VertexSet> values, OutputCollector<Text, VertexSet> output,
                Reporter reporter) throws IOException {
            // The set used to accommodate all sub sets.
            VertexSet result_set = new VertexSet();
            // Merge all the sub sets.
            while (values.hasNext()) {
                VertexSet curr_set = values.next();
                for (Vertex curr_vert : curr_set.getVertexes()) {
                    // It should be ok to not clone the vertex.
                    result_set.addVertex(curr_vert);
                }
            }
            // Output the result.
            output.collect(key, result_set);
        }
    }

    @Override
    public void execute() throws ProcessorExecutionException {
        JobConf jobConf = new JobConf(conf, AdjVertex2VertexSetTransformer.class);
        jobConf.setJobName("AdjVertex2VertexSetTransformer");

        // the keys are vertex identifiers (strings)
        jobConf.setOutputKeyClass(Text.class);
        // the values are vertex sets (Writable)
        jobConf.setOutputValueClass(VertexSet.class);
        jobConf.setMapperClass(MapClass.class);
        jobConf.setCombinerClass(ReduceClass.class);
        jobConf.setReducerClass(ReduceClass.class);
        // makes the file format suitable for machine processing.
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        FileInputFormat.setInputPaths(jobConf, srcPath);
        FileOutputFormat.setOutputPath(jobConf, destPath);
        jobConf.setNumMapTasks(mapperNum);
        jobConf.setNumReduceTasks(reducerNum);
        jobConf.setCompressMapOutput(true);
        jobConf.setMapOutputCompressorClass(GzipCodec.class);

        try {
            this.runningJob = JobClient.runJob(jobConf);
        } catch (IOException e) {
            throw new ProcessorExecutionException(e);
        }
    }

    /**
     * @param args
     */
    public static void main(String[] args) {
        try {
            int res = ToolRunner.run(new AdjVertex2VertexSetTransformer(), args);
            System.exit(res);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}