com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressMapper.java Source code

Java tutorial

Introduction

Here is the source code for com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressMapper.java

Source

/* Copyright (C) 2012 Intel Corporation.
 *     All rights reserved.
 *           
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 *
 * For more about this software visit:
 *      http://www.01.org/GraphBuilder 
 */
package com.intel.hadoop.graphbuilder.partition.mapreduce.edge;

import java.io.IOException;
import java.util.Random;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.log4j.Logger;

import com.intel.hadoop.graphbuilder.graph.Edge;
import com.intel.hadoop.graphbuilder.graph.Vertex;
import com.intel.hadoop.graphbuilder.parser.FieldParser;
import com.intel.hadoop.graphbuilder.parser.GraphParser;
import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressKeyType;
import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressValueType;
import com.intel.hadoop.graphbuilder.partition.strategy.GreedyIngress;
import com.intel.hadoop.graphbuilder.partition.strategy.Ingress;
import com.intel.hadoop.graphbuilder.partition.strategy.RandomIngress;

/**
 * 
 * This mapper class maps edge and vertex list into intermediate key, value
 * pairs. Because mapper takes in edge and vertex at the same time, "!" is used
 * as a special character in the beginning of each vertex data to distinguish it
 * from edge data.
 * <p>
 * For each edge e = (u,v,edata) it assigns its partition using the ingress
 * algorithm defined by the {@code JobConf}. After computing the partition id,
 * it generates 3 records:
 * <ul>
 * <li>EdgeType value: {@code (pid,  (u,v,edata))}</li>
 * <li>VertexType value: {@code (u, ( pid}, 0, 1))}</li>
 * <li>VertexType value: {@code (v, ( pid}, 1, 0))}</li>
 * </ul>
 * For each vertex v = (v, vdata) it generates a VertexType value:
 * {@code (v, vdata)}.
 * </p>
 * <p>
 * All EdgeType value with the same partition id is reduced into a local graph
 * partition. And all VertexType value with the same vertex id is reduced into a
 * vertex record.
 * </p>
 * 
 * @param <VidType>
 * @param <VertexData>
 * @param <EdgeData>
 * @param <KeyType>
 * @param <ValueType>
 */
public class EdgeIngressMapper<VidType extends WritableComparable<VidType>, VertexData extends Writable, EdgeData extends Writable, KeyType extends IngressKeyType<VidType>, ValueType extends IngressValueType<VidType, VertexData, EdgeData>>
        extends MapReduceBase implements Mapper<LongWritable, Text, KeyType, ValueType> {

    private static final Logger LOG = Logger.getLogger(EdgeIngressMapper.class);

    @SuppressWarnings("unchecked")
    @Override
    public void configure(JobConf job) {
        super.configure(job);
        this.keyClass = job.getMapOutputKeyClass();
        this.valClass = job.getMapOutputValueClass();
        numprocs = job.getInt("numProcs", 1);
        overpartition = job.getInt("overpartition", 1);

        String ingressMethod = job.get("ingress");
        if (ingressMethod.equals("greedy")) {
            this.ingress = new GreedyIngress<VidType>(numprocs);
        } else {
            this.ingress = new RandomIngress<VidType>(numprocs);
        }

        try {
            this.graphparser = (GraphParser) Class.forName(job.get("GraphParser")).newInstance();
            this.vidparser = (FieldParser) Class.forName(job.get("VidParser")).newInstance();
            this.vdataparser = (FieldParser) Class.forName(job.get("VdataParser")).newInstance();
            this.edataparser = (FieldParser) Class.forName(job.get("EdataParser")).newInstance();
            this.mapKey = (KeyType) keyClass.newInstance();
            this.mapValue = (ValueType) valClass.newInstance();
        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
    }

    @Override
    public void map(LongWritable key, Text value, OutputCollector<KeyType, ValueType> out, Reporter reporter)
            throws IOException {

        String text = value.toString();
        if (text.startsWith("!")) {
            // Remove vertex escape character ! before parsing
            text = text.substring(1);
            if (graphparser.isVertexData(text))
                mapVertexInput(text, out, reporter);
        } else if (graphparser.isEdgeData(text)) {
            mapEdgeInput(text, out, reporter);
        } else {
            LOG.error("Fail to parse: " + value.toString());
        }
    }

    /**
     * Maps the edge input into an edge record and 2 vertex record.
     * 
     * @param value
     * @param out
     * @param reporter
     */
    protected void mapEdgeInput(String value, OutputCollector<KeyType, ValueType> out, Reporter reporter) {
        try {
            Edge<VidType, EdgeData> e = graphparser.parseEdge(value, vidparser, edataparser);
            short pid = ingress.computePid(e.source(), e.target());

            // overpartition edges and assign its quasi pid.
            Random r = new Random();
            short qid = (short) (overpartition * pid + r.nextInt(overpartition));
            mapKey.set(qid, null, IngressKeyType.EDGEKEY);
            mapValue.initEdgeValue(qid, e.source(), e.target(), e.EdgeData());
            out.collect(mapKey, mapValue);

            // output source vertex record
            mapKey.set(pid, e.source(), IngressKeyType.VERTEXKEY);
            mapValue.initVrecValue(e.source(), pid, 0, 1);
            out.collect(mapKey, mapValue);

            // output target vertex record
            mapKey.set(pid, e.target(), IngressKeyType.VERTEXKEY);
            mapValue.initVrecValue(e.target(), pid, 1, 0);
            out.collect(mapKey, mapValue);

        } catch (Exception e1) {
            e1.printStackTrace();
        }
    }

    /**
     * Maps the vertex input into a vertex value.
     * 
     * @param value
     * @param out
     * @param reporter
     */
    protected void mapVertexInput(String value, OutputCollector<KeyType, ValueType> out, Reporter reporter) {
        try {
            Vertex<VidType, VertexData> v = graphparser.parseVertex(value, vidparser, vdataparser);
            // output vertex value map
            short pid = -1;
            mapKey.set(pid, v.vid(), IngressKeyType.VERTEXKEY);
            mapValue.initVrecValue(v.vid(), v.vdata());
            out.collect(mapKey, mapValue);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    protected GraphParser<VidType, VertexData, EdgeData> graphparser;
    protected FieldParser<VidType> vidparser;
    protected FieldParser<VertexData> vdataparser;
    protected FieldParser<EdgeData> edataparser;

    protected KeyType mapKey;
    protected ValueType mapValue;
    protected Ingress<VidType> ingress;
    protected Class keyClass;
    protected Class valClass;
    protected int numprocs;
    protected int overpartition;
}