Java tutorial
/* Copyright (C) 2012 Intel Corporation. * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For more about this software visit: * http://www.01.org/GraphBuilder */ package com.intel.hadoop.graphbuilder.partition.mapreduce.edge; import java.io.IOException; import java.util.BitSet; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Random; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.log4j.Logger; import com.intel.hadoop.graphbuilder.graph.Graph; import com.intel.hadoop.graphbuilder.graph.GraphOutput; import com.intel.hadoop.graphbuilder.graph.JsonVrecordFormatter; import com.intel.hadoop.graphbuilder.graph.VertexRecord; import com.intel.hadoop.graphbuilder.graph.VrecordFormatter; import com.intel.hadoop.graphbuilder.graph.simplegraph.SimpleGraph; import com.intel.hadoop.graphbuilder.graph.simplegraph.SimpleGraphOutput; import com.intel.hadoop.graphbuilder.graph.simplegraph.SimpleJsonFormatter; import com.intel.hadoop.graphbuilder.graph.simplegraph.SimpleSubGraph; import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.CombinedEdgeValueType; import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.CombinedVrecordValueType; import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressKeyType; import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressValueType; import com.intel.hadoop.graphbuilder.types.Mergable; /** * This reduce task has 2 subroutines: edges are reduced into a {@code Graph} * and vertices are reduced into {@code VertexRecord}. The reducer executes one * of the 2 reduce methods based on the key type: * <ul> * <li>For EdgeType key, the reduce function has type: pid * List<EdgeTypeValue> * -> {@code Graph}.</li> * <li>For non-EdgeType key, the reduce function has type: vid * * List<VertexRecordValue> -> {@code Graph}</li>. * </ul> * <p> * Output format and directory structures are determined by the choice of * {@code Graph} and its corresponding {@code GraphFormat} and * {@code GraphOutput}. * </p> * The current reducer uses {@code SimpleSubGraph}, {@code SimpleJsonFormatter}, * and {@code SimpleGraphOutput}. * * @see SimpleSubGraph * @see SimpleGraph * @see SimpleJSONFormatter * @see SimpleGraphOutput * @param <VidType> * @param <VertexData> * @param <EdgeData> * @param <KeyType> * @param <ValueType> */ public class EdgeIngressReducer<VidType extends WritableComparable<VidType>, VertexData extends Writable, EdgeData extends Writable, KeyType extends IngressKeyType<VidType>, ValueType extends IngressValueType<VidType, VertexData, EdgeData>> extends MapReduceBase implements Reducer<KeyType, ValueType, Text, Text> { private static final Logger LOG = Logger.getLogger(EdgeIngressReducer.class); @Override public void configure(JobConf job) { super.configure(job); this.numProcs = job.getInt("numProcs", 1); this.subpartPerPartition = job.getInt("subpartPerPartition", 1); // Switch to GLGraph by uncommenting the next line. // graphOutput = new GLGraphOutput(numProcs); graphOutput = new SimpleGraphOutput(); graphOutput.configure(job); } @Override public void reduce(KeyType key, Iterator<ValueType> iter, OutputCollector<Text, Text> out, Reporter reporter) throws IOException { if (key.flag() == IngressKeyType.EDGEKEY) { try { reduceEdge(key.pid(), iter, out, reporter); } catch (Exception e) { e.printStackTrace(); } } else if (key.flag() == IngressKeyType.VERTEXKEY) { reduceVertex(key.vid(), iter, reporter); VrecordFormatter vformatter = new JsonVrecordFormatter(); out.collect(new Text("vrecord"), new Text(vformatter.vrecordWriter(vrecord).toString())); } else { LOG.error("Unknown key type: " + key.flag()); } } @Override public void close() throws IOException { graphOutput.close(); } /** * Reduce a list of EdgeValues into a graph and output the graph. * * @param pid * @param iter * @param out * @param reporter * @throws Exception */ @SuppressWarnings("unchecked") protected void reduceEdge(int pid, Iterator<ValueType> iter, OutputCollector<Text, Text> out, Reporter reporter) throws Exception { // Switch to GLGraph by uncommenting the next line. // myGraph = new GLGraph<VidType, VertexData, EdgeData>(); myGraph = new SimpleSubGraph<VidType, VertexData, EdgeData>(); myGraph.setPid(pid / subpartPerPartition); ((SimpleSubGraph) myGraph).setSubPid(pid % subpartPerPartition); LOG.info("Reduce edges for graph: " + pid); while (iter.hasNext()) { ValueType val = iter.next(); CombinedEdgeValueType evalue = val.edgeValue(); myGraph.addEdges(evalue.sources(), evalue.targets(), evalue.edata()); } // Switch to GLGraph by uncommenting the next line. // GLJsonFormatter formatter = new GLJsonFormatter(); SimpleJsonFormatter formatter = new SimpleJsonFormatter(); LOG.info("Write out graph " + pid + " with " + myGraph.numEdges() + " edges"); graphOutput.writeAndClear(myGraph, formatter, out, reporter); LOG.info("Done reducing graph:" + pid + "."); } /** * Reduce a list of VertexRecordValues and the Vertex Data into a vertex * record. * * @param pid * @param iter * @param reporter */ protected void reduceVertex(VidType vid, Iterator<ValueType> iter, Reporter reporter) { vrecord = new VertexRecord<VidType, VertexData>(vid); BitSet mirrors = new BitSet(numProcs); int inEdges = 0; int outEdges = 0; while (iter.hasNext()) { ValueType val = iter.next(); CombinedVrecordValueType vrecordValue = val.vrecordValue(); inEdges += vrecordValue.inEdges(); outEdges += vrecordValue.outEdges(); HashSet<Short> pids = vrecordValue.pids(); Iterator<Short> piditer = pids.iterator(); while (piditer.hasNext()) { mirrors.set(piditer.next()); } vrecord.setMirrors(mirrors); vrecord.setInEdges(inEdges); vrecord.setOutEdges(outEdges); // merge vdata if (vrecordValue.hasVdata()) { if (vrecord.vdata() == null) vrecord.setVdata((VertexData) vrecordValue.vdata()); else if (vrecord.vdata() instanceof Mergable) { ((Mergable) vrecord.vdata()).add(vrecordValue.vdata()); } } } // Set owner Random generator = new Random(); if (vrecord.numMirrors() == 0) { vrecord.setOwner((short) generator.nextInt(numProcs)); } else { List<Short> mirrorsList = vrecord.mirrorList(); vrecord.setOwner(mirrorsList.get(generator.nextInt(mirrorsList.size()))); vrecord.removeMirror(vrecord.owner()); } } int numProcs, subpartPerPartition; protected Graph<VidType, VertexData, EdgeData> myGraph; protected VertexRecord<VidType, VertexData> vrecord; protected GraphOutput graphOutput; }