com.pegasus.ResultInfo.java Source code

Java tutorial

Introduction

Here is the source code for com.pegasus.ResultInfo.java

Source

/***********************************************************************
PEGASUS: Peta-Scale Graph Mining System
Authors: U Kang, Duen Horng Chau, and Christos Faloutsos
    
This software is licensed under Apache License, Version 2.0 (the  "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    
http://www.apache.org/licenses/LICENSE-2.0
    
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-------------------------------------------------------------------------
File: ConCmpt.java
 - HCC: Find Connected Components of graph
Version: 2.0
***********************************************************************/

package com.pegasus;

import java.io.*;
import java.util.*;
import java.text.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

class ResultInfo {
    public int changed;
    public int unchanged;
};

public class ConCmpt extends Configured implements Tool {
    public static int MAX_ITERATIONS = 2048;
    public static int changed_nodes[] = new int[MAX_ITERATIONS];
    public static int unchanged_nodes[] = new int[MAX_ITERATIONS];
    static int iter_counter = 0;

    //////////////////////////////////////////////////////////////////////
    // STAGE 1: join matrix elements and vector elements using matrix.dst_id and vector.row_id
    //////////////////////////////////////////////////////////////////////
    public static class MapStage1 extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
        private final Text from_node = new Text();
        private final Text to_node = new Text();
        int make_symmetric = 0;

        public void configure(JobConf job) {
            make_symmetric = Integer.parseInt(job.get("make_symmetric"));

            System.out.println("MapStage1 : make_symmetric = " + make_symmetric);
        }

        public void map(final LongWritable key, final Text value, final OutputCollector<Text, Text> output,
                final Reporter reporter) throws IOException {
            String line_text = value.toString();
            if (line_text.startsWith("#")) // ignore comments in the edge file
                return;

            final String[] line = line_text.split("\t");
            if (line.length < 2) {
                return;
            }

            if (line[1].startsWith("m")) { // input sample: 11   msu5
                from_node.set(line[0]);
                output.collect(from_node, new Text(line[1]));
            } else { // (src, dst) edge
                to_node.set(line[1]);

                output.collect(to_node, new Text(line[0])); // invert dst and src

                if (make_symmetric == 1) { // make inverse egges
                    from_node.set(line[0]);

                    if (!((to_node.toString()).equals(from_node.toString())))
                        output.collect(from_node, new Text(line[1]));
                }
            }
        }
    }

    public static class RedStage1 extends MapReduceBase implements Reducer<Text, Text, Text, Text> {

        public void configure(JobConf job) {
        }

        public void reduce(final Text key, final Iterator<Text> values, OutputCollector<Text, Text> output,
                final Reporter reporter) throws IOException {
            String component_id_str = "";
            Set<String> from_nodes_set = new HashSet<String>();
            boolean self_contained = false;
            String line = "";

            while (values.hasNext()) {
                Text from_cur_node = values.next();
                line = from_cur_node.toString();

                if (line.startsWith("m")) { // component info
                    if (component_id_str.length() == 0)
                        component_id_str = line.substring(3);
                } else { // edge line
                    String from_node = line;
                    from_nodes_set.add(from_node);
                    if ((key.toString()).equals(from_node))
                        self_contained = true;
                }
            }

            if (self_contained == false) // add self loop, if not exists.
                from_nodes_set.add(key.toString());

            Iterator from_nodes_it = from_nodes_set.iterator();
            while (from_nodes_it.hasNext()) {
                String component_info;
                String cur_key = (String) from_nodes_it.next();

                if (cur_key.equals(key.toString())) {
                    component_info = "msi" + component_id_str;
                    output.collect(new Text(cur_key), new Text(component_info));
                } else {
                    component_info = "moi" + component_id_str;
                    output.collect(new Text(cur_key), new Text(component_info));
                }
            }
        }
    }

    ////////////////////////////////////////////////////////////////////////////////////////////////
    // STAGE 2: merge partial component ids.
    //  - Input: partial component ids
    //  - Output: combined component ids
    ////////////////////////////////////////////////////////////////////////////////////////////////
    public static class MapStage2 extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
        // Identity mapper
        public void map(final LongWritable key, final Text value, final OutputCollector<Text, Text> output,
                final Reporter reporter) throws IOException {
            final String[] line = value.toString().split("\t");

            output.collect(new Text(line[0]), new Text(line[1]));
        }
    }

    public static class RedStage2 extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
        public void reduce(final Text key, final Iterator<Text> values, final OutputCollector<Text, Text> output,
                final Reporter reporter) throws IOException {
            //int i;
            String out_val = "ms";
            boolean bSelfChanged = false;
            char changed_prefix = 'x';
            //String complete_cistring = "";
            String cur_min_nodeid = "";
            String self_min_nodeid = "";

            while (values.hasNext()) {
                String cur_ci_string = values.next().toString();
                String cur_nodeid = "";
                try {
                    cur_nodeid = cur_ci_string.substring(3);
                } catch (Exception ex) {
                    System.out.println("Exception! cur_ci_string=[" + cur_ci_string + "]");
                }

                if (cur_ci_string.charAt(1) == 's') { // for calculating individual diameter
                    self_min_nodeid = cur_nodeid;
                }

                if (cur_min_nodeid.equals("")) {
                    cur_min_nodeid = cur_nodeid;
                } else {
                    if (cur_nodeid.compareTo(cur_min_nodeid) < 0)
                        cur_min_nodeid = cur_nodeid;
                }
            }

            if (self_min_nodeid.equals(cur_min_nodeid)) {
                changed_prefix = 'f'; // unchanged
            } else
                changed_prefix = 'i'; // changed

            out_val = out_val + changed_prefix + cur_min_nodeid;

            output.collect(key, new Text(out_val));
        }
    }

    public static class CombinerStage2 extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
        public void reduce(final Text key, final Iterator<Text> values, final OutputCollector<Text, Text> output,
                final Reporter reporter) throws IOException {
            String out_val = "moi";
            String cur_min_nodeid = "";

            while (values.hasNext()) {
                Text cur_value_text = values.next();
                String cur_ci_string = cur_value_text.toString();
                String cur_nodeid = "";
                try {
                    cur_nodeid = cur_ci_string.substring(3);
                } catch (Exception ex) {
                    System.out.println("Exception! cur_ci_string=[" + cur_ci_string + "]");
                }

                if (cur_ci_string.charAt(1) == 's') { // for calculating individual diameter
                    output.collect(key, new Text(cur_value_text));
                    continue;
                }

                if (cur_min_nodeid.equals("")) {
                    cur_min_nodeid = cur_nodeid;
                } else {
                    if (cur_nodeid.compareTo(cur_min_nodeid) < 0)
                        cur_min_nodeid = cur_nodeid;
                }
            }

            if (!cur_min_nodeid.equals("")) {
                out_val += cur_min_nodeid;

                output.collect(key, new Text(out_val));
            }
        }
    }

    //////////////////////////////////////////////////////////////////////
    // STAGE 3: Calculate number of nodes whose component id changed/unchanged.
    //  - Input: current component ids
    //  - Output: number_of_changed_nodes
    //////////////////////////////////////////////////////////////////////
    public static class MapStage3 extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
        // output : f n      ( n : # of node whose component didn't change)
        //          i m      ( m : # of node whose component changed)
        public void map(final LongWritable key, final Text value, final OutputCollector<Text, Text> output,
                final Reporter reporter) throws IOException {
            if (value.toString().startsWith("#"))
                return;

            final String[] line = value.toString().split("\t");
            char change_prefix = line[1].charAt(2);

            output.collect(new Text(Character.toString(change_prefix)), new Text(Integer.toString(1)));
        }
    }

    public static class RedStage3 extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
        public void reduce(final Text key, final Iterator<Text> values, final OutputCollector<Text, Text> output,
                final Reporter reporter) throws IOException {
            int sum = 0;

            while (values.hasNext()) {
                final String line = values.next().toString();
                int cur_value = Integer.parseInt(line);

                sum += cur_value;
            }

            output.collect(key, new Text(Integer.toString(sum)));
        }
    }

    //////////////////////////////////////////////////////////////////////
    // command line interface
    //////////////////////////////////////////////////////////////////////
    protected Path edge_path = null;
    protected Path curbm_path = null;
    protected Path tempbm_path = null;
    protected Path nextbm_path = null;
    protected Path output_path = null;
    protected Path all_vertices = null;
    protected Path grapherOut_path = null;
    protected int nreducers = 1;
    protected int cur_iter = 1;
    protected int make_symmetric = 0; // convert directed graph to undirected graph
    protected String local_output_path;

    // Main entry point.
    public static void main(final String[] args) throws Exception {
        final int result = ToolRunner.run(new Configuration(), new ConCmpt(), args);

        System.exit(result);
    }

    // submit the map/reduce job.
    public int run(final String[] args) throws Exception {

        Configuration conf = getConf();
        final FileSystem fs = FileSystem.get(conf);
        edge_path = new Path(conf.get("edge_path"));
        all_vertices = new Path(conf.get("all_vertices"));
        curbm_path = new Path(conf.get("iteration_state"));
        tempbm_path = new Path(conf.get("stage1out"));
        nextbm_path = new Path(conf.get("stage2out"));
        output_path = new Path(conf.get("stage3out"));
        grapherOut_path = new Path(conf.get("grapherout"));
        nreducers = Integer.parseInt(conf.get("num_reducers"));
        local_output_path = conf.get("local_output");

        // initital cleanup
        fs.delete(tempbm_path, true);
        fs.delete(nextbm_path, true);
        fs.delete(output_path, true);
        fs.delete(curbm_path, true);
        fs.delete(grapherOut_path, true);
        FileUtil.fullyDelete(new File(local_output_path));
        fs.mkdirs(curbm_path);
        //fs.mkdirs(grapherOut_path);

        FileStatus[] statusArray = fs.listStatus(all_vertices);
        for (int index = 0; index < statusArray.length; index++) {
            Path temp = statusArray[index].getPath();
            FileUtil.copy(fs, temp, fs, curbm_path, false, conf);
        }

        make_symmetric = 1;

        System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n");

        // Iteratively calculate neighborhood function. 
        // rotate directory
        for (int i = cur_iter; i < MAX_ITERATIONS; i++) {
            cur_iter++;

            System.out.println("configStage1");
            JobClient.runJob(configStage1());
            System.out.println("configStage2");
            JobClient.runJob(configStage2());
            System.out.println("configStage3");
            JobClient.runJob(configStage3());

            FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path));

            // copy neighborhood information from HDFS to local disk, and read it!
            String new_path = local_output_path + "/" + i;
            fs.copyToLocalFile(output_path, new Path(new_path));
            ResultInfo ri = readIterationOutput(new_path);

            changed_nodes[iter_counter] = ri.changed;
            changed_nodes[iter_counter] = ri.unchanged;

            iter_counter++;

            System.out.println("Hop " + i + " : changed = " + ri.changed + ", unchanged = " + ri.unchanged);
            fs.delete(curbm_path);
            fs.delete(tempbm_path);
            fs.delete(output_path);
            fs.rename(nextbm_path, curbm_path);

            // Stop when the minimum neighborhood doesn't change
            if (ri.changed == 0) {
                System.out.println("All the component ids converged. Finishing...");
                fs.rename(curbm_path, grapherOut_path);
                break;
            }
        }
        FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path));

        // finishing.
        System.out.println("\n[PEGASUS] Connected component computed.");
        System.out.println("[PEGASUS] Total Iteration = " + iter_counter);
        return 0;
    }

    // read neighborhood number after each iteration.
    public static ResultInfo readIterationOutput(String new_path) throws Exception {
        ResultInfo ri = new ResultInfo();
        ri.changed = ri.unchanged = 0;
        String output_path = new_path + "/part-00000";
        String file_line = "";

        try {
            BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(output_path), "UTF8"));

            // Read first line
            file_line = in.readLine();

            // Read through file one line at time. Print line # and line
            while (file_line != null) {
                final String[] line = file_line.split("\t");

                if (line[0].startsWith("i"))
                    ri.changed = Integer.parseInt(line[1]);
                else // line[0].startsWith("u")
                    ri.unchanged = Integer.parseInt(line[1]);

                file_line = in.readLine();
            }

            in.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return ri;//result;
    }

    // Configure stage1
    protected JobConf configStage1() throws Exception {
        final JobConf conf = new JobConf(getConf(), ConCmpt.class);
        conf.set("cur_iter", "" + cur_iter);
        conf.set("make_symmetric", "" + make_symmetric);
        conf.setJobName("ConCmpt_Stage1");

        conf.setMapperClass(MapStage1.class);
        conf.setReducerClass(RedStage1.class);

        FileInputFormat.setInputPaths(conf, edge_path, curbm_path);
        FileOutputFormat.setOutputPath(conf, tempbm_path);

        conf.setNumReduceTasks(nreducers);

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        return conf;
    }

    // Configure stage2
    protected JobConf configStage2() throws Exception {
        final JobConf conf = new JobConf(getConf(), ConCmpt.class);
        conf.set("cur_iter", "" + cur_iter);
        conf.set("make_symmetric", "" + make_symmetric);
        conf.setJobName("ConCmpt_Stage2");

        conf.setMapperClass(MapStage2.class);
        conf.setReducerClass(RedStage2.class);
        conf.setCombinerClass(CombinerStage2.class);

        FileInputFormat.setInputPaths(conf, tempbm_path);
        FileOutputFormat.setOutputPath(conf, nextbm_path);

        conf.setNumReduceTasks(nreducers);

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        return conf;
    }

    // Configure stage3
    protected JobConf configStage3() throws Exception {
        final JobConf conf = new JobConf(getConf(), ConCmpt.class);
        conf.setJobName("ConCmpt_Stage3");

        conf.setMapperClass(MapStage3.class);
        conf.setReducerClass(RedStage3.class);
        conf.setCombinerClass(RedStage3.class);

        FileInputFormat.setInputPaths(conf, nextbm_path);
        FileOutputFormat.setOutputPath(conf, output_path);

        conf.setNumReduceTasks(1); // This is necessary.

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        return conf;
    }
}