com.intel.hadoop.graphbuilder.preprocess.mapreduce.CreateGraphReducer.java Source code

Java tutorial

Introduction

Here is the source code for com.intel.hadoop.graphbuilder.preprocess.mapreduce.CreateGraphReducer.java

Source

/* Copyright (C) 2012 Intel Corporation.
 *     All rights reserved.
 *           
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 *
 * For more about this software visit:
 *      http://www.01.org/GraphBuilder 
 */
package com.intel.hadoop.graphbuilder.preprocess.mapreduce;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map.Entry;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

import com.intel.hadoop.graphbuilder.preprocess.functional.Functional;
import com.intel.hadoop.graphbuilder.preprocess.mapreduce.keyvalue.VertexEdgeUnionType;
import com.intel.hadoop.graphbuilder.util.Pair;

/**
 * The Reducer class applies user defined {@code Functional}s to reduce
 * duplicate edges and vertices. If no such {@code Functional} is provide, it
 * outputs the first instance and discards the rest with the same identifier. It
 * also discards self edges: v - > v. An option for discard bidirectional edge
 * is provided by {@code cleanBidirectionalEdge(boolean)}.
 * <p>
 * Output directory structure:
 * <ul>
 * <li>$outputdir/edata contains edge data output</li>
 * <li>$outputdir/vdata contains vertex data output</li>
 * </ul>
 * </p>
 * 
 */
public class CreateGraphReducer extends MapReduceBase
        implements Reducer<IntWritable, VertexEdgeUnionType, Text, Text> {
    public static enum CREATE_GRAPH_COUNTER {
        NUM_VERTICES, NUM_EDGES
    };

    @Override
    public void configure(JobConf job) {
        super.configure(job);
        this.valClass = job.getMapOutputValueClass();
        this.noBidir = job.getBoolean("noBidir", false);
        try {
            if (job.get("EdgeFunc") != null) {
                this.EdgeFunc = (Functional) Class.forName(job.get("EdgeFunc")).newInstance();
                this.EdgeFunc.configure(job);
            }
            if (job.get("VertexFunc") != null) {
                this.VertexFunc = (Functional) Class.forName(job.get("VertexFunc")).newInstance();
                this.VertexFunc.configure(job);
            }
        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public void reduce(IntWritable key, Iterator<VertexEdgeUnionType> iter, OutputCollector<Text, Text> out,
            Reporter reporter) throws IOException {

        VertexEdgeUnionType next;
        HashMap<Pair<Object, Object>, Writable> edgeset = new HashMap();
        HashMap<Object, Writable> vertexset = new HashMap();

        while (iter.hasNext()) {
            next = iter.next();
            // Apply reduce on vertex
            if (next.flag() == VertexEdgeUnionType.VERTEXVAL) {
                Object vid = next.vertex().vid();
                if (vertexset.containsKey(vid)) { // duplicate vertex
                    if (VertexFunc != null)
                        vertexset.put(vid, VertexFunc.reduce(next.vertex().vdata(), vertexset.get(vid)));
                } else {
                    if (VertexFunc != null)
                        vertexset.put(vid, VertexFunc.reduce(next.vertex().vdata(), VertexFunc.base()));
                    else
                        vertexset.put(vid, next.vertex().vdata());
                }
            } else {
                // Apply reduce on edges, remove self and (or merge) duplicate edges.
                // Optionally remove bidirectional edge.
                Pair p = new Pair(next.edge().source(), next.edge().target());

                // self edge
                if (p.getL().equals(p.getR()))
                    continue;

                // duplicate edge
                if (edgeset.containsKey(p)) {
                    if (EdgeFunc != null)
                        edgeset.put(p, EdgeFunc.reduce(next.edge().EdgeData(), edgeset.get(p)));
                } else {
                    if (EdgeFunc != null)
                        edgeset.put(p, EdgeFunc.reduce(next.edge().EdgeData(), EdgeFunc.base()));
                    else
                        edgeset.put(p, next.edge().EdgeData());
                }
            }
        }

        int nverts = 0;
        int nedges = 0;

        // Output vertex records
        Iterator<Entry<Object, Writable>> vertexiter = vertexset.entrySet().iterator();
        while (vertexiter.hasNext()) {
            Entry e = vertexiter.next();
            out.collect(new Text("vdata"), new Text(e.getKey().toString() + "\t" + e.getValue().toString()));
            nverts++;
        }
        reporter.incrCounter(CREATE_GRAPH_COUNTER.NUM_VERTICES, nverts);

        // Output edge records
        Iterator<Entry<Pair<Object, Object>, Writable>> edgeiter = edgeset.entrySet().iterator();
        while (edgeiter.hasNext()) {
            Entry<Pair<Object, Object>, Writable> e = edgeiter.next();
            if (noBidir && edgeset.containsKey(e.getKey().reverse())) {
                continue;
            } else {
                out.collect(new Text("edata"),
                        new Text(e.getKey().getL() + "\t" + e.getKey().getR() + "\t" + e.getValue().toString()));
            }
            nedges++;
        }
        reporter.incrCounter(CREATE_GRAPH_COUNTER.NUM_EDGES, nedges);
    }

    protected boolean noBidir;
    protected Class keyClass;
    protected Class valClass;
    protected Functional EdgeFunc;
    protected Functional VertexFunc;
}