playground.johannes.socialnetworks.survey.ivt2009.graph.io.GraphBuilder.java Source code

Java tutorial

Introduction

Here is the source code for playground.johannes.socialnetworks.survey.ivt2009.graph.io.GraphBuilder.java

Source

/* *********************************************************************** *
 * project: org.matsim.*
 * GraphBuilder.java
 *                                                                         *
 * *********************************************************************** *
 *                                                                         *
 * copyright       : (C) 2010 by the members listed in the COPYING,        *
 *                   LICENSE and WARRANTY file.                            *
 * email           : info at matsim dot org                                *
 *                                                                         *
 * *********************************************************************** *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *   See also COPYING, LICENSE and WARRANTY file                           *
 *                                                                         *
 * *********************************************************************** */
package playground.johannes.socialnetworks.survey.ivt2009.graph.io;

import gnu.trove.TDoubleArrayList;
import gnu.trove.TDoubleDoubleHashMap;
import gnu.trove.TObjectIntHashMap;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.commons.math.stat.StatUtils;
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
import org.apache.log4j.Logger;
import org.matsim.api.core.v01.Id;
import org.matsim.api.core.v01.Scenario;
import org.matsim.api.core.v01.population.Person;
import org.matsim.core.config.ConfigUtils;
import org.matsim.core.population.PersonImpl;
import org.matsim.core.scenario.ScenarioUtils;
import org.matsim.core.utils.collections.Tuple;

import playground.johannes.sna.gis.CRSUtils;
import playground.johannes.sna.graph.Vertex;
import playground.johannes.sna.math.Discretizer;
import playground.johannes.sna.math.FixedSampleSizeDiscretizer;
import playground.johannes.sna.math.Histogram;
import playground.johannes.sna.math.LinearDiscretizer;
import playground.johannes.sna.snowball.SampledGraphProjection;
import playground.johannes.sna.snowball.SampledVertexDecorator;
import playground.johannes.sna.util.TXTWriter;
import playground.johannes.socialnetworks.graph.social.SocialPerson;
import playground.johannes.socialnetworks.snowball2.io.SampledGraphProjMLWriter;
import playground.johannes.socialnetworks.snowball2.spatial.SpatialSampledGraphProjectionBuilder;
import playground.johannes.socialnetworks.statistics.Correlations;
import playground.johannes.socialnetworks.survey.ivt2009.graph.SocialSparseEdge;
import playground.johannes.socialnetworks.survey.ivt2009.graph.SocialSparseGraph;
import playground.johannes.socialnetworks.survey.ivt2009.graph.SocialSparseGraphBuilder;
import playground.johannes.socialnetworks.survey.ivt2009.graph.SocialSparseVertex;
import playground.johannes.socialnetworks.survey.ivt2009.graph.io.AlterTableReader.VertexRecord;

import com.vividsolutions.jts.geom.GeometryFactory;
import com.vividsolutions.jts.geom.Point;

/**
 * @author illenberger
 *
 */
public class GraphBuilder {

    public static final Logger logger = Logger.getLogger(GraphBuilder.class);

    private SocialSparseGraphBuilder builder = new SocialSparseGraphBuilder(CRSUtils.getCRS(4326));

    private SpatialSampledGraphProjectionBuilder<SocialSparseGraph, SocialSparseVertex, SocialSparseEdge> projBuilder = new SpatialSampledGraphProjectionBuilder<SocialSparseGraph, SocialSparseVertex, SocialSparseEdge>();

    private Scenario scenario = ScenarioUtils.createScenario(ConfigUtils.createConfig());

    private GeometryFactory geoFacotry = new GeometryFactory();

    private ErrorLogger errLogger;

    private SocialSparseGraph graph;

    private Map<SocialSparseVertex, SampledVertexDecorator<SocialSparseVertex>> projMap;

    private SampledGraphProjection<SocialSparseGraph, SocialSparseVertex, SocialSparseEdge> proj;

    private Map<String, SocialSparseVertex> idMap;

    public SampledGraphProjection<SocialSparseGraph, SocialSparseVertex, SocialSparseEdge> buildGraph(
            List<String> alterTables, List<String> egoTables, List<String> sqlDumps) throws IOException {
        errLogger = new ErrorLogger();
        /*
         * Load raw data.
         */
        AlterTableReader alterReader = new AlterTableReader(alterTables);
        EgoTableReader egoReader = new EgoTableReader(egoTables);
        SQLDumpReader sqlReader = new SQLDumpReader(sqlDumps);
        /*
         * Build the raw graph and a sampled projection.
         */
        graph = builder.createGraph();
        proj = projBuilder.createGraph(graph);
        /*
         * Create the vertices.
         */
        projMap = new HashMap<SocialSparseVertex, SampledVertexDecorator<SocialSparseVertex>>();
        idMap = new HashMap<String, SocialSparseVertex>();

        for (Entry<String, VertexRecord> entry : alterReader.getVertices().entrySet()) {
            VertexRecord vRecord = entry.getValue();
            /*
             * Extract the home location.
             */
            Point point;
            if (vRecord.isEgo) {
                point = sqlReader.getEgoLocation(vRecord.egoSQLId);
                if (point == null) {
                    /*
                     * try getting coordinates via google
                     */
                    logger.info("Requesting google server for coordinates.");
                    point = egoReader.getEgoLocation(vRecord.id);
                }
            } else {
                point = sqlReader.getAlterLocation(vRecord.alterKeys);
            }
            if (point == null) {
                errLogger.logNoCoordinate(vRecord.isEgo);
                //            point = geoFacotry.createPoint(new Coordinate(0, 0));
            }
            /*
             * Create a vertex and its projection.
             */
            SocialSparseVertex vertex = builder.addVertex(graph, createPerson(vRecord, sqlReader), point);
            SampledVertexDecorator<SocialSparseVertex> vProj = projBuilder.addVertex(proj, vertex);
            /*
             * If it is an ego set the snowball attributes.
             */
            if (vRecord.isEgo) {
                vProj.sample(infereIterationSampled(new Integer(vRecord.id)));
                vProj.detect(vProj.getIterationSampled() - 1);
            }

            projMap.put(vertex, vProj);
            idMap.put(vRecord.id, vertex);
            //         recordMap.put(vRecord.id, vRecord);
        }
        /*
         * Create the edges.
         */
        for (Tuple<VertexRecord, VertexRecord> edge : alterReader.getEdges()) {
            SocialSparseVertex v1 = idMap.get(edge.getFirst().id);
            SocialSparseVertex v2 = idMap.get(edge.getSecond().id);
            SocialSparseEdge socialEdge = builder.addEdge(graph, v1, v2);
            /*
             * Check if we have double edges.
             */
            if (socialEdge != null) {
                SampledVertexDecorator<SocialSparseVertex> vProj1 = projMap.get(v1);
                SampledVertexDecorator<SocialSparseVertex> vProj2 = projMap.get(v2);

                projBuilder.addEdge(proj, vProj1, vProj2, socialEdge);
                /*
                 * Set the snowball attributes if it is not an ego.
                 */
                if (!vProj1.isSampled()) {
                    if (vProj1.isDetected())
                        /*
                         * If this vertex is already detected check if the adjacent vertex has been sampled earlier.
                         */
                        vProj1.detect(Math.min(vProj1.getIterationDetected(), vProj2.getIterationSampled()));
                    else
                        vProj1.detect(vProj2.getIterationSampled());
                }

                if (!vProj2.isSampled()) {
                    if (vProj2.isDetected())
                        /*
                         * If this vertex is already detected check if the adjacent vertex has been sampled earlier.
                         */
                        vProj2.detect(Math.min(vProj2.getIterationDetected(), vProj1.getIterationSampled()));
                    else
                        vProj2.detect(vProj1.getIterationSampled());
                }
                /*
                 * add edge attributes
                 */
                VertexRecord rec1 = edge.getFirst();
                VertexRecord rec2 = edge.getSecond();
                double freq = 0;
                if (rec1.isEgo) {
                    freq = sqlReader.getF2FFrequencey(rec1.egoSQLId, rec2.alterKeys.get(rec1.egoSQLId));
                } else {
                    freq = sqlReader.getF2FFrequencey(rec2.egoSQLId, rec1.alterKeys.get(rec2.egoSQLId));
                }
                socialEdge.setFrequency(freq);

                socialEdge.setType(sqlReader.getEdgeType(rec1, rec2));

            } else {
                errLogger.logDoubleEdge();
            }
        }
        /*
         * Sociogram
         */
        loadSociogramData(alterReader.getVertices().values(), sqlReader);

        logger.info(errLogger.toString());
        return proj;
    }

    private SocialPerson createPerson(VertexRecord record, SQLDumpReader sqlData) {
        PersonImpl matsimPerson = new PersonImpl(Id.create(record.id, Person.class));
        SocialPerson person = new SocialPerson(matsimPerson);

        int age;
        if (record.isEgo)
            age = sqlData.getEgoAge(record.egoSQLId);
        else
            age = sqlData.getAlterAge(record.alterKeys);

        if (age < 0)
            errLogger.logNoAge(record.isEgo);
        else
            matsimPerson.setAge(age);

        String sex = sqlData.getSex(record);
        if (sex != null)
            matsimPerson.setSex(sex);
        else
            errLogger.logNoSex(record.isEgo);

        if (record.isEgo)
            matsimPerson.setLicence(sqlData.getLicense(record));

        if (record.isEgo)
            matsimPerson.setCarAvail(sqlData.getCarAvail(record));

        person.setCitizenship(sqlData.getCitizenship(record));
        person.setEducation(sqlData.getEducation(record));
        person.setIncome(sqlData.getIncome(record));
        person.setCivilStatus(sqlData.getCivilStatus(record));
        return person;
    }

    private Integer infereIterationSampled(Integer id) {
        if (id >= 0 && id <= 1000)
            return 0;
        else if (id > 1000 && id <= 10000)
            return 1;
        else if (id > 10000 && id <= 100000)
            return 2;
        else if (id > 100000 && id <= 400000)
            return 3;
        else if (id > 400000)
            return 4;
        else {
            logger.warn(String.format("Cannot infere sampling iteration (%1$s)", id));
            return null;
        }
    }

    private void loadSociogramData(Collection<VertexRecord> records, SQLDumpReader sqlData) {
        logger.info("Loading sociogram data...");
        Map<String, VertexRecord> map = sqlData.getFullAlterKeyMappping(records);

        TObjectIntHashMap<Vertex> rawDegrees = new TObjectIntHashMap<Vertex>();
        for (Vertex v : proj.getVertices()) {
            rawDegrees.put(v, v.getNeighbours().size());
        }

        int edgecnt = 0;
        int doublecnt = 0;
        int egoEdge = 0;

        Set<Vertex> notOkVertices = new HashSet<Vertex>();
        Set<Vertex> okVertices = new HashSet<Vertex>();
        DescriptiveStatistics notOkStats = new DescriptiveStatistics();
        DescriptiveStatistics okStats = new DescriptiveStatistics();

        DescriptiveStatistics numDistr = new DescriptiveStatistics();
        DescriptiveStatistics numDistrNoZero = new DescriptiveStatistics();
        DescriptiveStatistics sizeDistr = new DescriptiveStatistics();

        TDoubleArrayList sizeValues = new TDoubleArrayList();
        TDoubleArrayList kSizeValues = new TDoubleArrayList();
        TDoubleArrayList numValues = new TDoubleArrayList();
        TDoubleArrayList numValues2 = new TDoubleArrayList();
        TDoubleArrayList kNumValues = new TDoubleArrayList();

        for (VertexRecord record : records) {
            if (record.isEgo) {
                List<Set<String>> cliques = sqlData.getCliques(record);
                numDistr.addValue(cliques.size());

                Vertex v = idMap.get(record.id);
                numValues.add(cliques.size());
                kNumValues.add(v.getNeighbours().size());

                if (!cliques.isEmpty())
                    numDistrNoZero.addValue(cliques.size());

                for (Set<String> clique : cliques) {
                    sizeDistr.addValue(clique.size());
                    sizeValues.add(clique.size());
                    kSizeValues.add(rawDegrees.get(projMap.get(v)));
                    numValues2.add(cliques.size());
                    List<SocialSparseVertex> vertices = new ArrayList<SocialSparseVertex>(clique.size());
                    for (String alter : clique) {
                        VertexRecord r = map.get(record.egoSQLId + alter);
                        if (r != null) {
                            SocialSparseVertex vertex = idMap.get(r.id);
                            if (vertex != null) {
                                vertices.add(vertex);
                            } else {
                                logger.warn("Vertex not found.");
                            }
                        } else {
                            logger.warn("Record not found.");
                        }
                    }

                    for (int i = 0; i < vertices.size(); i++) {
                        for (int j = i + 1; j < vertices.size(); j++) {
                            SampledVertexDecorator<SocialSparseVertex> vProj1 = projMap.get(vertices.get(i));
                            SampledVertexDecorator<SocialSparseVertex> vProj2 = projMap.get(vertices.get(j));
                            if (!vProj1.isSampled() && !vProj2.isSampled()) {

                                if (Math.random() < 0.62) {
                                    SocialSparseEdge socialEdge = builder.addEdge(graph, vertices.get(i),
                                            vertices.get(j));
                                    if (socialEdge != null) {
                                        projBuilder.addEdge(proj, vProj1, vProj2, socialEdge);
                                        edgecnt++;

                                        if (vProj1.isSampled() || vProj2.isSampled()) {
                                            egoEdge++;
                                            if (vProj1.isSampled())
                                                notOkVertices.add(vProj1);
                                            else
                                                notOkVertices.add(vProj2);
                                        }

                                    } else {
                                        doublecnt++;
                                        if (vProj1.isSampled())
                                            okVertices.add(vProj1);
                                        else if (vProj2.isSampled())
                                            okVertices.add(vProj2);
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        for (Vertex v : okVertices)
            okStats.addValue(rawDegrees.get(v));

        for (Vertex v : notOkVertices)
            notOkStats.addValue(rawDegrees.get(v));
        try {

            TDoubleDoubleHashMap hist = Histogram.createHistogram(okStats, new LinearDiscretizer(1), false);
            TXTWriter.writeMap(hist, "k", "n",
                    "/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/augmented/k_ok.txt");

            TDoubleDoubleHashMap hist2 = Histogram.createHistogram(notOkStats, new LinearDiscretizer(1), false);
            TXTWriter.writeMap(hist2, "k", "n",
                    "/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/augmented/k_notok.txt");

            TDoubleDoubleHashMap ratio = new TDoubleDoubleHashMap();
            double[] keys = hist.keys();
            for (double k : keys) {
                double val1 = hist2.get(k);
                double val2 = hist.get(k);

                ratio.put(k, val1 / (val2 + val1));
            }
            TXTWriter.writeMap(ratio, "k", "p",
                    "/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/augmented/k_ratio.txt");

            logger.info("Mean num of cliques: " + numDistrNoZero.getMean());
            logger.info("Mean size: " + sizeDistr.getMean());
            logger.info("Median num of cliques: " + StatUtils.percentile(numDistrNoZero.getValues(), 50));
            logger.info("Median size: " + StatUtils.percentile(sizeDistr.getValues(), 50));

            TDoubleDoubleHashMap histNum = Histogram.createHistogram(numDistrNoZero,
                    FixedSampleSizeDiscretizer.create(numDistrNoZero.getValues(), 2, 20), true);
            Histogram.normalize(histNum);
            TXTWriter.writeMap(histNum, "num", "freq",
                    "/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/augmented/numCliques.txt");

            TDoubleDoubleHashMap histSize = Histogram.createHistogram(sizeDistr,
                    FixedSampleSizeDiscretizer.create(sizeDistr.getValues(), 2, 20), true);
            Histogram.normalize(histSize);
            TXTWriter.writeMap(histSize, "size", "freq",
                    "/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/augmented/numPersons.txt");

            Discretizer discretizer = FixedSampleSizeDiscretizer.create(kSizeValues.toNativeArray(), 20, 20);
            TDoubleArrayList valuesX = new TDoubleArrayList();
            for (int i = 0; i < kSizeValues.size(); i++) {
                valuesX.add(discretizer.discretize(kSizeValues.get(i)));
            }

            Correlations.writeToFile(Correlations.mean(valuesX.toNativeArray(), sizeValues.toNativeArray()),
                    "/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/augmented/size_k.txt", "k", "size");

            discretizer = FixedSampleSizeDiscretizer.create(kNumValues.toNativeArray(), 20, 20);
            valuesX = new TDoubleArrayList();
            for (int i = 0; i < kNumValues.size(); i++) {
                valuesX.add(discretizer.discretize(kNumValues.get(i)));
            }

            Correlations.writeToFile(Correlations.mean(valuesX.toNativeArray(), numValues.toNativeArray()),
                    "/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/augmented/num_k.txt", "k", "n");

            Correlations.writeToFile(Correlations.mean(numValues2.toNativeArray(), sizeValues.toNativeArray()),
                    "/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/augmented/size_num.txt", "num",
                    "size");
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        logger.info(String.format("Inserted %1$s edges, %2$s edges already present.", edgecnt, doublecnt));
        logger.info(String.format("Inserted %1$s edges between at least one ego.", egoEdge));
    }

    private class ErrorLogger {

        private int noEgoCoords;

        private int noAlterCoords;

        private int noEgoAge;

        private int noAlterAge;

        private int doubleEdges;

        private int noEgoSex;

        private int noAlterSex;

        public void logNoCoordinate(boolean isEgo) {
            if (isEgo)
                noEgoCoords++;
            else
                noAlterCoords++;
        }

        public void logDoubleEdge() {
            doubleEdges++;
        }

        public void logNoAge(boolean isEgo) {
            if (isEgo)
                noEgoAge++;
            else
                noAlterAge++;
        }

        public void logNoSex(boolean isEgo) {
            if (isEgo)
                noEgoSex++;
            else
                noAlterSex++;
        }

        @Override
        public String toString() {
            StringBuilder builder = new StringBuilder();
            builder.append("The following warnings occurred:\n");
            builder.append(String.valueOf(noEgoCoords));
            builder.append(" egos without coordinates\n");
            builder.append(String.valueOf(noAlterCoords));
            builder.append(" alters without coordinates\n");
            builder.append(String.valueOf(noEgoAge));
            builder.append(" egos without age\n");
            builder.append(String.valueOf(noAlterAge));
            builder.append(" alters without age\n");
            builder.append(String.valueOf(doubleEdges));
            builder.append(" double edges\n");
            builder.append(String.valueOf(noEgoSex));
            builder.append(" egos without sex\n");
            builder.append(String.valueOf(noAlterSex));
            builder.append(" alters without sex\n");
            return builder.toString();
        }
    }

    public static void main(String args[]) throws IOException {
        GraphBuilder builder = new GraphBuilder();

        ArrayList<String> alterTables = new ArrayList<String>();
        alterTables.add("/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/raw/alters1.txt");
        alterTables.add("/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/raw/alters2.txt");

        ArrayList<String> egoTables = new ArrayList<String>();
        //      egoTables.add("/Users/jillenberger/Work/socialnets/data/ivt2009/01-2011/egos1.txt");
        //      egoTables.add("/Users/jillenberger/Work/socialnets/data/ivt2009/09-2010/egos2.txt");

        ArrayList<String> sqlDumps = new ArrayList<String>();
        sqlDumps.add("/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/raw/snowball.csv");

        SampledGraphProjection<SocialSparseGraph, SocialSparseVertex, SocialSparseEdge> graph = builder
                .buildGraph(alterTables, egoTables, sqlDumps);
        SampledGraphProjMLWriter writer = new SampledGraphProjMLWriter(new SocialSparseGraphMLWriter());
        //      writer.write(graph, "/Users/jillenberger/Work/socialnets/data/ivt2009/11-2011/graph/graph.graphml");
        //      writer.write(graph, "/Users/jillenberger/Work/socialnets/data/ivt2009/09-2010/graph/sociogram/graph.graphml");
    }
}