com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressReducer.java Source code

Java tutorial

Introduction

Here is the source code for com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressReducer.java

Source

/* Copyright (C) 2012 Intel Corporation.
 *     All rights reserved.
 *           
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 *
 * For more about this software visit:
 *      http://www.01.org/GraphBuilder 
 */
package com.intel.hadoop.graphbuilder.partition.mapreduce.edge;

import java.io.IOException;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Random;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.log4j.Logger;

import com.intel.hadoop.graphbuilder.graph.Graph;
import com.intel.hadoop.graphbuilder.graph.GraphOutput;
import com.intel.hadoop.graphbuilder.graph.JsonVrecordFormatter;
import com.intel.hadoop.graphbuilder.graph.VertexRecord;
import com.intel.hadoop.graphbuilder.graph.VrecordFormatter;
import com.intel.hadoop.graphbuilder.graph.simplegraph.SimpleGraph;
import com.intel.hadoop.graphbuilder.graph.simplegraph.SimpleGraphOutput;
import com.intel.hadoop.graphbuilder.graph.simplegraph.SimpleJsonFormatter;
import com.intel.hadoop.graphbuilder.graph.simplegraph.SimpleSubGraph;
import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.CombinedEdgeValueType;
import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.CombinedVrecordValueType;
import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressKeyType;
import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressValueType;
import com.intel.hadoop.graphbuilder.types.Mergable;

/**
 * This reduce task has 2 subroutines: edges are reduced into a {@code Graph}
 * and vertices are reduced into {@code VertexRecord}. The reducer executes one
 * of the 2 reduce methods based on the key type:
 * <ul>
 * <li>For EdgeType key, the reduce function has type: pid * List<EdgeTypeValue>
 * -> {@code Graph}.</li>
 * <li>For non-EdgeType key, the reduce function has type: vid *
 * List<VertexRecordValue> -> {@code Graph}</li>.
 * </ul>
 * <p>
 * Output format and directory structures are determined by the choice of
 * {@code Graph} and its corresponding {@code GraphFormat} and
 * {@code GraphOutput}.
 * </p>
 * The current reducer uses {@code SimpleSubGraph}, {@code SimpleJsonFormatter},
 * and {@code SimpleGraphOutput}.
 * 
 * @see SimpleSubGraph
 * @see SimpleGraph
 * @see SimpleJSONFormatter
 * @see SimpleGraphOutput
 * @param <VidType>
 * @param <VertexData>
 * @param <EdgeData>
 * @param <KeyType>
 * @param <ValueType>
 */
public class EdgeIngressReducer<VidType extends WritableComparable<VidType>, VertexData extends Writable, EdgeData extends Writable, KeyType extends IngressKeyType<VidType>, ValueType extends IngressValueType<VidType, VertexData, EdgeData>>
        extends MapReduceBase implements Reducer<KeyType, ValueType, Text, Text> {

    private static final Logger LOG = Logger.getLogger(EdgeIngressReducer.class);

    @Override
    public void configure(JobConf job) {
        super.configure(job);
        this.numProcs = job.getInt("numProcs", 1);
        this.subpartPerPartition = job.getInt("subpartPerPartition", 1);
        // Switch to GLGraph by uncommenting the next line.
        // graphOutput = new GLGraphOutput(numProcs);
        graphOutput = new SimpleGraphOutput();
        graphOutput.configure(job);
    }

    @Override
    public void reduce(KeyType key, Iterator<ValueType> iter, OutputCollector<Text, Text> out, Reporter reporter)
            throws IOException {

        if (key.flag() == IngressKeyType.EDGEKEY) {
            try {
                reduceEdge(key.pid(), iter, out, reporter);
            } catch (Exception e) {
                e.printStackTrace();
            }
        } else if (key.flag() == IngressKeyType.VERTEXKEY) {
            reduceVertex(key.vid(), iter, reporter);
            VrecordFormatter vformatter = new JsonVrecordFormatter();
            out.collect(new Text("vrecord"), new Text(vformatter.vrecordWriter(vrecord).toString()));
        } else {
            LOG.error("Unknown key type: " + key.flag());
        }
    }

    @Override
    public void close() throws IOException {
        graphOutput.close();
    }

    /**
     * Reduce a list of EdgeValues into a graph and output the graph.
     * 
     * @param pid
     * @param iter
     * @param out
     * @param reporter
     * @throws Exception
     */
    @SuppressWarnings("unchecked")
    protected void reduceEdge(int pid, Iterator<ValueType> iter, OutputCollector<Text, Text> out, Reporter reporter)
            throws Exception {

        // Switch to GLGraph by uncommenting the next line.
        // myGraph = new GLGraph<VidType, VertexData, EdgeData>();
        myGraph = new SimpleSubGraph<VidType, VertexData, EdgeData>();
        myGraph.setPid(pid / subpartPerPartition);
        ((SimpleSubGraph) myGraph).setSubPid(pid % subpartPerPartition);

        LOG.info("Reduce edges for graph: " + pid);
        while (iter.hasNext()) {
            ValueType val = iter.next();
            CombinedEdgeValueType evalue = val.edgeValue();
            myGraph.addEdges(evalue.sources(), evalue.targets(), evalue.edata());
        }

        // Switch to GLGraph by uncommenting the next line.
        // GLJsonFormatter formatter = new GLJsonFormatter();
        SimpleJsonFormatter formatter = new SimpleJsonFormatter();
        LOG.info("Write out graph " + pid + " with " + myGraph.numEdges() + " edges");
        graphOutput.writeAndClear(myGraph, formatter, out, reporter);

        LOG.info("Done reducing graph:" + pid + ".");
    }

    /**
     * Reduce a list of VertexRecordValues and the Vertex Data into a vertex
     * record.
     *
     * @param pid
     * @param iter
     * @param reporter
     */
    protected void reduceVertex(VidType vid, Iterator<ValueType> iter, Reporter reporter) {
        vrecord = new VertexRecord<VidType, VertexData>(vid);

        BitSet mirrors = new BitSet(numProcs);
        int inEdges = 0;
        int outEdges = 0;

        while (iter.hasNext()) {
            ValueType val = iter.next();

            CombinedVrecordValueType vrecordValue = val.vrecordValue();
            inEdges += vrecordValue.inEdges();
            outEdges += vrecordValue.outEdges();
            HashSet<Short> pids = vrecordValue.pids();
            Iterator<Short> piditer = pids.iterator();
            while (piditer.hasNext()) {
                mirrors.set(piditer.next());
            }

            vrecord.setMirrors(mirrors);
            vrecord.setInEdges(inEdges);
            vrecord.setOutEdges(outEdges);

            // merge vdata
            if (vrecordValue.hasVdata()) {
                if (vrecord.vdata() == null)
                    vrecord.setVdata((VertexData) vrecordValue.vdata());
                else if (vrecord.vdata() instanceof Mergable) {
                    ((Mergable) vrecord.vdata()).add(vrecordValue.vdata());
                }
            }
        }

        // Set owner
        Random generator = new Random();
        if (vrecord.numMirrors() == 0) {
            vrecord.setOwner((short) generator.nextInt(numProcs));
        } else {
            List<Short> mirrorsList = vrecord.mirrorList();
            vrecord.setOwner(mirrorsList.get(generator.nextInt(mirrorsList.size())));
            vrecord.removeMirror(vrecord.owner());
        }
    }

    int numProcs, subpartPerPartition;
    protected Graph<VidType, VertexData, EdgeData> myGraph;
    protected VertexRecord<VidType, VertexData> vrecord;
    protected GraphOutput graphOutput;
}