Example usage for org.apache.hadoop.io FloatWritable FloatWritable

List of usage examples for org.apache.hadoop.io FloatWritable FloatWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io FloatWritable FloatWritable.

Prototype

public FloatWritable(float value) 

Source Link

Usage

From source file:org.apache.giraph.examples.SimpleShortestPathsVertexTest.java

License:Apache License

/**
 * Test the behavior when a new, but not shorter path to a vertex has been
 * found.//  w  w w  . j a va2s .  c o m
 */
@Test
public void testOnNoShorterPathFound() throws Exception {

    SimpleShortestPathsVertex vertex = new SimpleShortestPathsVertex();

    MockUtils.MockedEnvironment<LongWritable, DoubleWritable, FloatWritable, DoubleWritable> env = MockUtils
            .prepareVertex(vertex, 1L, new LongWritable(7L), new DoubleWritable(0.5), false);

    Mockito.when(SOURCE_ID.get(env.getConfiguration())).thenReturn(2L);

    vertex.addEdge(EdgeFactory.create(new LongWritable(10L), new FloatWritable(2.5f)));
    vertex.addEdge(EdgeFactory.create(new LongWritable(20L), new FloatWritable(0.5f)));

    vertex.compute(Lists.newArrayList(new DoubleWritable(2), new DoubleWritable(1.5)));

    assertTrue(vertex.isHalted());
    assertEquals(0.5d, vertex.getValue().get(), 0d);

    env.verifyNoMessageSent();
}

From source file:org.apache.giraph.examples.SimpleShortestPathVertexTest.java

License:Apache License

/** test the behavior when a shorter path to a vertex has been found */
public void testOnShorterPathFound() throws Exception {

    SimpleShortestPathsVertex vertex = new SimpleShortestPathsVertex();
    vertex.addEdge(new LongWritable(10L), new FloatWritable(2.5f));
    vertex.addEdge(new LongWritable(20L), new FloatWritable(0.5f));

    MockUtils.MockedEnvironment<LongWritable, DoubleWritable, FloatWritable, DoubleWritable> env = MockUtils
            .prepareVertex(vertex, 1L, new LongWritable(7L), new DoubleWritable(Double.MAX_VALUE), false);

    Mockito.when(env.getConfiguration().getLong(SimpleShortestPathsVertex.SOURCE_ID,
            SimpleShortestPathsVertex.SOURCE_ID_DEFAULT)).thenReturn(2L);

    vertex.compute(Lists.newArrayList(new DoubleWritable(2), new DoubleWritable(1.5)).iterator());

    assertTrue(vertex.isHalted());/*from   w w  w . ja va  2 s .co  m*/
    assertEquals(1.5, vertex.getVertexValue().get());

    env.verifyMessageSent(new LongWritable(10L), new DoubleWritable(4));
    env.verifyMessageSent(new LongWritable(20L), new DoubleWritable(2));
}

From source file:org.apache.giraph.examples.SimpleShortestPathVertexTest.java

License:Apache License

/** test the behavior when a new, but not shorter path to a vertex has been found */
public void testOnNoShorterPathFound() throws Exception {

    SimpleShortestPathsVertex vertex = new SimpleShortestPathsVertex();
    vertex.addEdge(new LongWritable(10L), new FloatWritable(2.5f));
    vertex.addEdge(new LongWritable(20L), new FloatWritable(0.5f));

    MockUtils.MockedEnvironment<LongWritable, DoubleWritable, FloatWritable, DoubleWritable> env = MockUtils
            .prepareVertex(vertex, 1L, new LongWritable(7L), new DoubleWritable(0.5), false);

    Mockito.when(env.getConfiguration().getLong(SimpleShortestPathsVertex.SOURCE_ID,
            SimpleShortestPathsVertex.SOURCE_ID_DEFAULT)).thenReturn(2L);

    vertex.compute(Lists.newArrayList(new DoubleWritable(2), new DoubleWritable(1.5)).iterator());

    assertTrue(vertex.isHalted());//from  w  w  w  .j  a va  2s. c  o m
    assertEquals(0.5, vertex.getVertexValue().get());

    env.verifyNoMessageSent();
}

From source file:org.apache.giraph.examples.TopicVertexR.java

License:Apache License

@Override
public void compute(Iterable<TopicVocabMessage> messages) {

    if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs
        if (getValue().equals(new DoubleWritable(0))) { //0 for vocab
            int edgesNum = getNumEdges();
            int[] neighbors = new int[edgesNum];
            neighbors[0] = edgesNum - 1;
            for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                long vertexid = edge.getTargetVertexId().get();
                int index = 1;
                for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) {
                    long neighbourid = edge2.getTargetVertexId().get();
                    if (vertexid == neighbourid)
                        continue;
                    neighbors[index++] = (int) neighbourid;
                }/* ww  w .j a  v  a2  s.  c om*/
                sendMessage(edge.getTargetVertexId(), new TopicVocabMessage((int) getId().get(), neighbors));
            }
        }
    }

    else if (getSuperstep() == 1) {
        if (getValue().equals(new DoubleWritable(1))) { //1 for tweet
            HashMap<Integer, int[]> vocabNeighbors = new HashMap<Integer, int[]>();
            HashSet<Long> neighborTweets = new HashSet<Long>(); //The tweets that share at least one vocab with me
            for (TopicVocabMessage message : messages) {
                int vocabId = message.getSourceId();
                int[] tweetId = message.getNeighborId();
                tweetId[0] = (int) getId().get();
                vocabNeighbors.put(vocabId, tweetId);
                for (int i = 0; i < tweetId.length; i++)
                    neighborTweets.add(new Long(tweetId[i]));
            }
            for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) {
                float idf = (TWEET_SIZE) / vocabNeighbors.get((int) edge.getTargetVertexId().get()).length;
                float tf = edge.getValue().get();
                float tfIDF = tf * idf;
                edge.setValue(new FloatWritable(tfIDF));
            }
            //Send the TF-IDF (the feature vector) to all the tweets that share at least one vocab with the current tweet (neighborTweets)
            int edgesNum = getNumEdges();
            float[] mytfidf = new float[edgesNum + 1];
            int[] neighbors = new int[edgesNum + 1];
            mytfidf[0] = edgesNum;
            neighbors[0] = edgesNum;
            int index = 1;
            for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                mytfidf[index] = edge.getValue().get();
                neighbors[index++] = (int) edge.getTargetVertexId().get();
            }
            for (Long tweet : neighborTweets) {
                if (tweet.equals(getId().get()))
                    continue;
                sendMessage(new LongWritable(tweet),
                        new TopicVocabMessage((int) getId().get(), neighbors, mytfidf));
                //message content: currentTweet, vocab ID, TF-IDF. Sent to tweet
            }
        }
        setValue(new DoubleWritable(0.0));
    } else if (getSuperstep() == 2) {
        //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet
        HashMap<Integer, HashMap<Integer, Float>> neighborsTFIDF = new HashMap<Integer, HashMap<Integer, Float>>();
        for (TopicVocabMessage message : messages) {
            int tweet = message.getSourceId();
            int[] vocabId = message.getNeighborId();
            float[] tfidf = message.getTFIDF();
            HashMap<Integer, Float> currentTweetTFIDF = new HashMap<Integer, Float>();
            neighborsTFIDF.put(tweet, currentTweetTFIDF);
            int size = vocabId[0];
            for (int i = 1; i <= size; i++)
                currentTweetTFIDF.put(vocabId[i], tfidf[i]);
        }
        //Calculate similarity with other tweets that share at least one vocab with the current tweet
        float[] sims = new float[neighborsTFIDF.size()];
        int simInd = 0; //Index of the simialrities array
        //Calculate the norm of the current tweet
        float norm = 0f;
        for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
            norm += edge.getValue().get() * edge.getValue().get();
        }
        norm = (float) Math.sqrt(norm);
        //For each 2-hop neighboor (tweet not vocab)
        for (Integer neighbor : neighborsTFIDF.keySet()) {
            float cosSim = 0.0f;
            //Calc the norm of the second tweet
            float norm2 = 0f;
            HashMap<Integer, Float> currentMap = neighborsTFIDF.get(neighbor);
            for (Integer vocabId : currentMap.keySet()) {
                norm2 += currentMap.get(vocabId) * currentMap.get(vocabId);
            }
            norm2 = (float) Math.sqrt(norm2);
            //Calculate the numenator
            for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                if (currentMap.containsKey((int) edge.getTargetVertexId().get())) {
                    cosSim += edge.getValue().get() * currentMap.get((int) edge.getTargetVertexId().get());
                }
            }
            sims[simInd++] = cosSim / (norm * norm2);
            if (sims[simInd - 1] >= epson) { // add edge between the two tweets
                addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(sims[simInd - 1])));
                ;
            }
        }
        //Calculate the variance
        float var = getVariance(sims);
        setValue(new DoubleWritable(var));

        //}
    }
    voteToHalt();

}

From source file:org.apache.giraph.examples.TopicVertexShared.java

License:Apache License

@Override
public void compute(Iterable<TopicVocabMessage> messages) {

    if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs
        if (getValue().equals(new DoubleWritable(0))) { //0 for vocab
            int edgesNum = getNumEdges();
            int[] neighbors = new int[edgesNum];
            neighbors[0] = edgesNum - 1;
            for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                long vertexid = edge.getTargetVertexId().get();
                int index = 1;
                for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) {
                    long neighbourid = edge2.getTargetVertexId().get();
                    if (vertexid == neighbourid)
                        continue;
                    neighbors[index++] = (int) neighbourid;
                }//ww  w. j a  v  a 2s.com
                sendMessage(edge.getTargetVertexId(), new TopicVocabMessage((int) getId().get(), neighbors));
            }
        }
    }

    else if (getSuperstep() == 1) {
        if (getValue().equals(new DoubleWritable(1))) { //1 for tweet
            setValue(new DoubleWritable(0.0));
            HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>();
            HashMap<Integer, ArrayList<Integer>> neighborTweets = new HashMap<Integer, ArrayList<Integer>>(); //The tweets that share at least one vocab with me
            float norm = 0f;
            for (TopicVocabMessage message : messages) {
                int vocabId = message.getSourceId();
                int[] tweetId = message.getNeighborId();
                //tweetId[0] = (int)getId().get();
                float idf = (TWEET_SIZE) / tweetId.length;
                LongWritable vocabLong = new LongWritable(new Long(vocabId));
                float tf = getEdgeValue(vocabLong).get();
                float tfIDF = tf * idf;
                setEdgeValue(vocabLong, new FloatWritable(tfIDF));
                norm += tfIDF * tfIDF;
                for (int i = 1; i < tweetId.length; i++) {
                    ArrayList<Integer> neighbourTweetsVocab = neighborTweets.get(tweetId[i]);
                    if (neighbourTweetsVocab == null) {
                        neighbourTweetsVocab = new ArrayList<Integer>();
                        neighborTweets.put(tweetId[i], neighbourTweetsVocab);
                    }
                    neighbourTweetsVocab.add(vocabId);
                }
                tfidfs.put(vocabId, tfIDF);
            }
            norm = (float) Math.sqrt(norm);
            if (neighborTweets.size() != 0)
                setValue(new DoubleWritable(norm));
            //Send the TF-IDF (the feature vector) to all the tweets that share at least one vocab with the current tweet (neighborTweets)
            for (Integer tweet : neighborTweets.keySet()) {
                Long tweetid = new Long(tweet);
                ArrayList<Integer> edgesNum = neighborTweets.get(tweet);
                float[] mytfidf = new float[edgesNum.size() + 2];
                int[] neighbors = new int[edgesNum.size() + 1];
                mytfidf[0] = edgesNum.size() + 1;
                neighbors[0] = edgesNum.size();
                int index = 1;
                for (Integer vocabid : edgesNum) {
                    mytfidf[index] = tfidfs.get(vocabid);
                    neighbors[index++] = vocabid;
                }
                mytfidf[index] = norm;
                sendMessage(new LongWritable(tweetid),
                        new TopicVocabMessage((int) getId().get(), neighbors, mytfidf));
                //message content: currentTweet, vocab ID, TF-IDF. Sent to tweet
            }
        }

    } else if (getSuperstep() == 2) {
        //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet
        //Calculate similarity with other tweets that share at least one vocab with the current tweet
        ArrayList<Float> sims = new ArrayList<Float>();
        //Calculate the norm of the current tweet
        float norm = new Float(getValue().get());
        for (TopicVocabMessage message : messages) {
            float cosSim = 0.0f;
            int tweet = message.getSourceId();
            int[] vocabId = message.getNeighborId();
            float[] tfidf = message.getTFIDF();
            HashMap<Integer, Float> currentTweetTFIDF = new HashMap<Integer, Float>();
            float norm2 = tfidf[tfidf.length - 1];
            int size = vocabId[0];
            for (int i = 1; i <= size; i++) {
                currentTweetTFIDF.put(vocabId[i], tfidf[i]);
            }
            for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                if (currentTweetTFIDF.containsKey((int) edge.getTargetVertexId().get())) {
                    cosSim += edge.getValue().get()
                            * currentTweetTFIDF.get((int) edge.getTargetVertexId().get());
                }
            }
            float similarity = cosSim / (norm * norm2);
            sims.add(similarity);
            if (similarity >= epson) { // add edge between the two tweets
                addEdge(EdgeFactory.create(new LongWritable(tweet), new FloatWritable(similarity)));
                ;
            }
        }
        //Calculate the variance
        float var = getVariance(sims);
        setValue(new DoubleWritable(var));

        //}
    }

    voteToHalt();
}

From source file:org.apache.giraph.examples.TopicVertexVocabBased.java

License:Apache License

@Override
public void compute(Iterable<TopicVocabMessageCustome> messages) {

    if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs
        if (getValue().equals(new DoubleWritable(0))) { //0 for vocab
            int edgesNum = getNumEdges();
            int[] neighbors = new int[edgesNum + 1];
            neighbors[0] = edgesNum;// w  w  w  .j av  a 2s  .c  o  m
            float[] tfidfs = new float[edgesNum + 1];
            tfidfs[0] = edgesNum;
            for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                long vertexid = edge.getTargetVertexId().get();
                int index = 1;
                for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) {
                    long neighbourid = edge2.getTargetVertexId().get();
                    float edgevalue = edge2.getValue().get();
                    float tfidf = (edgevalue * TWEET_SIZE) / edgesNum;
                    //if (vertexid == neighbourid)
                    //continue;
                    tfidfs[index] = tfidf;
                    neighbors[index++] = (int) neighbourid;
                }
                sendMessage(edge.getTargetVertexId(),
                        new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs));
            }
        }
    }

    else if (getSuperstep() == 1) {
        if (getValue().equals(new DoubleWritable(1))) { //1 for tweet
            setValue(new DoubleWritable(0.0));
            HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>();
            HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one 
            int nodeid = (int) getId().get();
            float norm = 0f;
            for (TopicVocabMessageCustome message : messages) {
                int vocabId = message.getSourceId();
                int[] tweetId = message.getNeighborId();
                float[] msgtfidfs = message.getTFIDF();
                for (int i = 1; i < tweetId.length; i++) {
                    if (nodeid == tweetId[i]) {
                        norm += msgtfidfs[i] * msgtfidfs[i];
                        tfidfs.put(vocabId, msgtfidfs[i]);

                    } else {
                        HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]);
                        if (neighbourTweetsVocab == null) {
                            neighbourTweetsVocab = new HashMap<Integer, Float>();
                            neighborTweets.put(tweetId[i], neighbourTweetsVocab);
                        }
                        neighbourTweetsVocab.put(vocabId, msgtfidfs[i]);
                    }
                }
            }
            norm = (float) Math.sqrt(norm);
            for (Integer neighbor : neighborTweets.keySet()) {
                float cosSim = 0.0f;
                HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor);
                for (Integer mytfidf : tfidfs.keySet()) {
                    if (currentTweetTFIDF.containsKey(mytfidf)) {
                        cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf);
                    }
                }
                cosSim = cosSim / norm;
                addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim)));
                int[] sentNeighbors = new int[1];
                float[] senttfidf = new float[1];
                senttfidf[0] = norm;
                sendMessage(new LongWritable(new Long(neighbor)),
                        new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf));
            }
            //setValue(new DoubleWritable(newvar));

        }

    } else if (getSuperstep() == 2) {
        //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet
        //Calculate similarity with other tweets that share at least one vocab with the current tweet
        ArrayList<Float> sims = new ArrayList<Float>();
        HashMap<Integer, Float> norms = new HashMap<Integer, Float>();
        for (TopicVocabMessageCustome message : messages) {
            int vocabId = message.getSourceId();
            float[] tfidfs = message.getTFIDF();
            norms.put(vocabId, tfidfs[0]);
        }
        for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) {
            int myid = (int) edge.getTargetVertexId().get();
            if (myid <= 0) // vocab vertex
                continue;
            float norm = norms.get(myid);
            float sim = edge.getValue().get();
            sims.add(sim / norm);
            edge.setValue(new FloatWritable(sim / norm));
        }
        //Calculate the variance
        float var = getVariance(sims);
        setValue(new DoubleWritable(var));

        //}
    }

    voteToHalt();
}

From source file:org.apache.giraph.examples.TopicVertexVocabBasedAggregate.java

License:Apache License

@Override
public void compute(Iterable<TopicVocabMessageCustome> messages) {

    if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs
        if (getValue().equals(new DoubleWritable(0))) { //0 for vocab
            int edgesNum = getNumEdges();
            int[] neighbors = new int[edgesNum + 1];
            neighbors[0] = edgesNum;/*  ww  w  .  j  av a  2 s  . co m*/
            float[] tfidfs = new float[edgesNum + 1];
            tfidfs[0] = edgesNum;
            for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                long vertexid = edge.getTargetVertexId().get();
                int index = 1;
                for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) {
                    long neighbourid = edge2.getTargetVertexId().get();
                    float edgevalue = edge2.getValue().get();
                    float tfidf = (edgevalue * TWEET_SIZE) / edgesNum;
                    //if (vertexid == neighbourid)
                    //continue;
                    tfidfs[index] = tfidf;
                    neighbors[index++] = (int) neighbourid;
                }
                sendMessage(edge.getTargetVertexId(),
                        new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs));
            }
            voteToHalt();
        }
    }

    else if (getSuperstep() == 1) {
        if (getValue().equals(new DoubleWritable(1))) { //1 for tweet
            setValue(new DoubleWritable(0.0));
            HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>();
            HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one 
            int nodeid = (int) getId().get();
            float norm = 0f;
            for (TopicVocabMessageCustome message : messages) {
                int vocabId = message.getSourceId();
                int[] tweetId = message.getNeighborId();
                float[] msgtfidfs = message.getTFIDF();
                for (int i = 1; i < tweetId.length; i++) {
                    if (nodeid == tweetId[i]) {
                        norm += msgtfidfs[i] * msgtfidfs[i];
                        tfidfs.put(vocabId, msgtfidfs[i]);

                    } else {
                        HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]);
                        if (neighbourTweetsVocab == null) {
                            neighbourTweetsVocab = new HashMap<Integer, Float>();
                            neighborTweets.put(tweetId[i], neighbourTweetsVocab);
                        }
                        neighbourTweetsVocab.put(vocabId, msgtfidfs[i]);
                    }
                }
            }
            norm = (float) Math.sqrt(norm);
            int nsize = neighborTweets.size();
            for (Integer neighbor : neighborTweets.keySet()) {
                float cosSim = 0.0f;
                HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor);
                for (Integer mytfidf : tfidfs.keySet()) {
                    if (currentTweetTFIDF.containsKey(mytfidf)) {
                        cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf);
                    }
                }
                cosSim = cosSim / norm;
                //if(nsize >= edgesthreshold) {
                addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim)));
                //}
                int[] sentNeighbors = new int[1];
                float[] senttfidf = new float[1];
                senttfidf[0] = norm;
                sendMessage(new LongWritable(new Long(neighbor)),
                        new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf));
            }

            //setValue(new DoubleWritable(newvar));

        }

    } else if (getSuperstep() == 2) {
        //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet
        //Calculate similarity with other tweets that share at least one vocab with the current tweet
        HashMap<Integer, Float> norms = new HashMap<Integer, Float>();
        for (TopicVocabMessageCustome message : messages) {
            int vocabId = message.getSourceId();
            float[] tfidfs = message.getTFIDF();
            norms.put(vocabId, tfidfs[0]);
        }
        //if(norms.size() >= edgesthreshold) {
        ArrayList<Float> sims = new ArrayList<Float>();
        HashSet<Integer> neighbors = new HashSet<Integer>();
        for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) {
            int myid = (int) edge.getTargetVertexId().get();
            if (myid <= 0) // vocab vertex
                continue;
            float norm = norms.get(myid);
            float sim = edge.getValue().get();
            sim = sim / norm;
            sims.add(sim);
            if (sim >= epson)
                neighbors.add(myid);
            //edge.setValue(new FloatWritable(sim));
        }
        //Calculate the variance
        float var = getVariance(sims);
        setValue(new DoubleWritable(var));
        //int numMegaSetps = (int)(getSuperstep() / (MEGA_STEP-1));
        AggregateMessageCustome msg = new AggregateMessageCustome((int) getId().get(), neighbors, var);//, numMegaSetps);
        aggregate(VarianceAggregator.VAR_AGG, msg);
        // }
        voteToHalt();
        //}
    }

}

From source file:org.apache.giraph.examples.TopicVertexVocabBasedAggregateDynamic.java

License:Apache License

@Override
public void compute(Iterable<TopicVocabMessageCustome> messages) {
    if (getSuperstep() % BspServiceWorker.MEGA_STEP == 0) { //Each vocab will broadcast its neighbor IDs
        try {/* w ww.j  av a2  s.co  m*/
            if (getId().get() <= 0) { //for vocab
                int edgesNum = getNumEdges();
                if (edgesNum == 0) {
                    voteToHalt();
                    removeVertexRequest(getId());
                }
                int[] neighbors = new int[edgesNum + 1];
                neighbors[0] = edgesNum;
                float[] tfidfs = new float[edgesNum + 1];
                tfidfs[0] = edgesNum;
                for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                    long vertexid = edge.getTargetVertexId().get();
                    int index = 1;
                    for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) {
                        long neighbourid = edge2.getTargetVertexId().get();
                        float edgevalue = edge2.getValue().get();
                        float tfidf = (edgevalue * TWEET_SIZE) / edgesNum;
                        //if (vertexid == neighbourid)
                        //continue;
                        tfidfs[index] = tfidf;
                        neighbors[index++] = (int) neighbourid;
                    }
                    sendMessage(edge.getTargetVertexId(),
                            new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs));
                }
                //voteToHalt();
            }
        } catch (IOException ex) {
            System.out.println(ex.getMessage());
        }
    }

    else if (getSuperstep() % BspServiceWorker.MEGA_STEP == 1) {
        if (getId().get() > 0) { // for tweet
            setValue(new DoubleWritable(0.0));
            HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>();
            HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one 
            int nodeid = (int) getId().get();
            float norm = 0f;
            for (TopicVocabMessageCustome message : messages) {
                int vocabId = message.getSourceId();
                int[] tweetId = message.getNeighborId();
                float[] msgtfidfs = message.getTFIDF();
                for (int i = 1; i < tweetId.length; i++) {
                    if (nodeid == tweetId[i]) {
                        norm += msgtfidfs[i] * msgtfidfs[i];
                        tfidfs.put(vocabId, msgtfidfs[i]);

                    } else {
                        HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]);
                        if (neighbourTweetsVocab == null) {
                            neighbourTweetsVocab = new HashMap<Integer, Float>();
                            neighborTweets.put(tweetId[i], neighbourTweetsVocab);
                        }
                        neighbourTweetsVocab.put(vocabId, msgtfidfs[i]);
                    }
                }
            }
            norm = (float) Math.sqrt(norm);
            //System.out.println(getId().get()+" : norm = "+norm);
            int nsize = neighborTweets.size();
            for (Integer neighbor : neighborTweets.keySet()) {
                float cosSim = 0.0f;
                HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor);
                for (Integer mytfidf : tfidfs.keySet()) {
                    if (currentTweetTFIDF.containsKey(mytfidf)) {
                        cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf);
                        //System.out.println(getId().get()+" , "+neighbor+" , "+mytfidf+" , "+currentTweetTFIDF.get(mytfidf)+" , "+tfidfs.get(mytfidf));
                    }
                }
                cosSim = cosSim / norm;
                addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim)));
                int[] sentNeighbors = new int[1];
                float[] senttfidf = new float[1];
                senttfidf[0] = norm;

                sendMessage(new LongWritable(new Long(neighbor)),
                        new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf));
            }

        }

    } else if (getSuperstep() % BspServiceWorker.MEGA_STEP == 2) {
        //try{
        if (getId().get() > 0) {
            HashMap<Integer, Float> norms = new HashMap<Integer, Float>();
            for (TopicVocabMessageCustome message : messages) {
                int vocabId = message.getSourceId();
                float[] tfidfs = message.getTFIDF();
                norms.put(vocabId, tfidfs[0]);
            }
            ArrayList<Float> sims = new ArrayList<Float>();
            HashSet<Integer> neighbors = new HashSet<Integer>();
            for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) {
                int myid = (int) edge.getTargetVertexId().get();
                if (myid <= 0) // vocab vertex
                    continue;
                //if(norms.get(myid) == null) {
                //System.out.println("In norm null = "+myid+", "+getId().get()+", "+getSuperstep());
                //}                  
                float norm = norms.get(myid);
                float sim = edge.getValue().get();
                sim = sim / norm;
                sims.add(sim);
                if (sim >= epson)
                    neighbors.add(myid);
                //edge.setValue(new FloatWritable(sim));
                //System.out.println(getId().get()+"\t"+myid+"\t"+sim);
            }

            //Calculate the variance
            float var = getVariance(sims);
            AggregateMessageCustome msg = new AggregateMessageCustome((int) getId().get(), neighbors, var);//, numMegaSetps);
            aggregate(VarianceAggregator.VAR_AGG, msg);
        }
        long numMegaSetps = getNumMegaSteps();
        if (numMegaSetps * BspServiceWorker.SHIFT_SIZE >= (BspServiceWorker.TOTAL_TWEETS
                - BspServiceWorker.WINDOW_SIZE)) {
            //System.out.println("In vertex " + getId().get() +". About voting to halt, in superstep = " + getSuperstep() +", in numMegaSetps = " + numMegaSetps);
            voteToHalt();
        } else {
            /*
               1-Removing vertices that will chenge their identities
               2-Remove edges to the vertices that will change their identities
            */
            long stRange = getStRange(numMegaSetps);
            long endRange = getEndRange(stRange);
            //System.out.println("In vertex " + getId().get() +". checking if need to load new data, in superstep = " + getSuperstep() +", in numMegaSetps = " + numMegaSetps);
            if (getId().get() >= stRange && getId().get() <= endRange) {
                HashSet<LongWritable> ids = new HashSet<LongWritable>();
                for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                    //long targetVertexId = edge.getTargetVertexId().get();
                    ids.add(edge.getTargetVertexId());
                    //removeEdges(edge.getTargetVertexId());
                    //removeEdgesRequest(getId(), new LongWritable(targetVertexId));
                }
                //removeVertexAllEdgeRquest();
                for (LongWritable id : ids) {
                    removeEdges(id);
                }
            } else { //Remove edges from this vertex to the vertices that will change their identities
                if (getId().get() > 0) {
                    HashSet<LongWritable> ids = new HashSet<LongWritable>();
                    for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                        long targetVertexId = edge.getTargetVertexId().get();
                        if (targetVertexId > 0) {
                            ids.add(edge.getTargetVertexId());
                            //removeEdges(edge.getTargetVertexId());
                            //removeEdgesRequest(getId(), new LongWritable(targetVertexId));
                        }
                    }
                    for (LongWritable id : ids) {
                        removeEdges(id);
                    }
                } else {
                    HashSet<LongWritable> ids = new HashSet<LongWritable>();
                    for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                        long targetVertexId = edge.getTargetVertexId().get();
                        if (targetVertexId >= stRange && targetVertexId <= endRange) {
                            //removeEdgesRequest(getId(), new LongWritable(targetVertexId));
                            //removeEdges(edge.getTargetVertexId());
                            ids.add(edge.getTargetVertexId());
                        }
                    }
                    for (LongWritable id : ids) {
                        removeEdges(id);
                    }
                }
            }
        }
        //}
        //catch(IOException ex){
        //System.out.println(ex.getMessage());
        //}
    } else if (getSuperstep() % BspServiceWorker.MEGA_STEP == 3) {
        long numMegaSetps = getNumMegaSteps() + 1;
        String hostname = getHostName();
        if ((hostname != null) && (!hostname.equalsIgnoreCase(""))) {
            String fileName = "/user/exp/ahmed/50k_sliding_20k_dynamic/" + numMegaSetps + "-" + getHostName();
            setHostName("");
            System.out.println("Trying to read batch from: " + fileName);
            loadNewTimeSLot(fileName);
        }
    }

}

From source file:org.apache.giraph.examples.TopicVertexVocabBasedAggregateDynamic.java

License:Apache License

private void loadNewTimeSLot(String filePath) {
    try {/*  w w  w . jav a  2s  .com*/
        //Load new data  
        Path ptRead = new Path(filePath);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataInputStream reader = fs.open(ptRead);
        //System.out.println("In vertex " + getId().get() +". reader = " + reader + ", in superstep = " + getSuperstep());
        String line = reader.readLine();
        if (line.length() != 0) {
            String[] splits = line.split("\t");
            for (String split : splits) {
                addVertexRequest(new LongWritable(Long.parseLong(split)), new DoubleWritable(0)); //new vocab vertex
            }
        }
        while ((line = reader.readLine()) != null) {
            //System.out.println(line);
            String[] splits = line.split("\t");
            long tweetId = Long.parseLong(splits[0]);
            //addVertexRequest(new LongWritable(tweetId) , new DoubleWritable(0)); //new vocab vertex
            //For each tweet
            int ind = 1;
            int endInd = (splits.length - 1) / 2;
            for (int i = 0; i < endInd; i++) {
                //create vocab vertex and add undirected edge
                long vocabId = Long.parseLong(splits[ind++]);
                float tf = Float.parseFloat(splits[ind++]);
                //LongWritable vocabIndex = new LongWritable(vocabId);
                //Add edge from the tweet to the vocab
                FloatWritable edgeVal = new FloatWritable(tf);
                //System.out.println("In vertex " + getId().get() + ", adding an edge to " + vocabId + ", superstep " + getSuperstep() + " i = " + i + ", endInd = " + endInd);
                addEdgeRequest(new LongWritable(tweetId),
                        EdgeFactory.create(new LongWritable(vocabId), edgeVal));
                //Add edge from the vocab to the tweet
                addEdgeRequest(new LongWritable(vocabId),
                        EdgeFactory.create(new LongWritable(tweetId), edgeVal));
            }
        }
        reader.close();
    } catch (Exception e) {
        System.out.println("TopicVertexVocabBasedAggregateDynamic, loading batch file, " + e.getMessage());
    }
}

From source file:org.apache.giraph.examples.TopicVertexVocabBasedOptimizedAggregate.java

License:Apache License

@Override
public void compute(Iterable<TopicVocabMessageCustome> messages) {

    if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs
        if (getValue().equals(new DoubleWritable(0))) { //0 for vocab
            int edgesNum = getNumEdges();
            int[] neighbors = new int[edgesNum + 1];
            neighbors[0] = edgesNum;/*  w  ww.j  a va  2 s .  co m*/
            float[] tfidfs = new float[edgesNum + 1];
            tfidfs[0] = edgesNum;
            for (Edge<LongWritable, FloatWritable> edge : getEdges()) {
                long vertexid = edge.getTargetVertexId().get();
                int index = 1;
                for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) {
                    long neighbourid = edge2.getTargetVertexId().get();
                    float edgevalue = edge2.getValue().get();
                    float tfidf = (edgevalue * TWEET_SIZE) / edgesNum;
                    //if (vertexid == neighbourid)
                    //continue;
                    tfidfs[index] = tfidf;
                    neighbors[index++] = (int) neighbourid;
                }
                sendMessage(edge.getTargetVertexId(),
                        new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs));
            }
            voteToHalt();
        }
    }

    else if (getSuperstep() == 1) {
        if (getValue().equals(new DoubleWritable(1))) { //1 for tweet
            setValue(new DoubleWritable(0.0));
            HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>();
            HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one 
            int nodeid = (int) getId().get();
            float norm = 0f;
            for (TopicVocabMessageCustome message : messages) {
                int vocabId = message.getSourceId();
                int[] tweetId = message.getNeighborId();
                float[] msgtfidfs = message.getTFIDF();
                for (int i = 1; i < tweetId.length; i++) {
                    if (nodeid == tweetId[i]) {
                        norm += msgtfidfs[i] * msgtfidfs[i];
                        tfidfs.put(vocabId, msgtfidfs[i]);

                    } else {
                        HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]);
                        if (neighbourTweetsVocab == null) {
                            neighbourTweetsVocab = new HashMap<Integer, Float>();
                            neighborTweets.put(tweetId[i], neighbourTweetsVocab);
                        }
                        neighbourTweetsVocab.put(vocabId, msgtfidfs[i]);
                    }
                }
            }
            norm = (float) Math.sqrt(norm);
            int nsize = neighborTweets.size();
            for (Integer neighbor : neighborTweets.keySet()) {
                float cosSim = 0.0f;
                HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor);
                for (Integer mytfidf : tfidfs.keySet()) {
                    if (currentTweetTFIDF.containsKey(mytfidf)) {
                        cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf);
                    }
                }
                cosSim = cosSim / norm;
                //if(nsize >= edgesthreshold) {
                addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim)));
                //}
                int[] sentNeighbors = new int[1];
                float[] senttfidf = new float[1];
                senttfidf[0] = norm;
                sendMessage(new LongWritable(new Long(neighbor)),
                        new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf));
            }

            //setValue(new DoubleWritable(newvar));

        }

    } else if (getSuperstep() == 2) {
        //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet
        //Calculate similarity with other tweets that share at least one vocab with the current tweet
        HashMap<Integer, Float> norms = new HashMap<Integer, Float>();
        for (TopicVocabMessageCustome message : messages) {
            int vocabId = message.getSourceId();
            float[] tfidfs = message.getTFIDF();
            norms.put(vocabId, tfidfs[0]);
        }
        //if(norms.size() >= edgesthreshold) {
        ArrayList<Float> sims = new ArrayList<Float>();
        HashSet<Integer> neighbors = new HashSet<Integer>();
        for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) {
            int myid = (int) edge.getTargetVertexId().get();
            if (myid <= 0) // vocab vertex
                continue;
            float norm = norms.get(myid);
            float sim = edge.getValue().get();
            sim = sim / norm;
            sims.add(sim);
            if (sim >= epson)
                neighbors.add(myid);
            //edge.setValue(new FloatWritable(sim));
        }
        //Calculate the variance
        float var = getVariance(sims);
        setValue(new DoubleWritable(var));
        //int numMegaSetps = (int)(getSuperstep() / (MEGA_STEP-1));
        //Collections.sort(neighbors);
        AggregateMessageCustome msg = new AggregateMessageCustome((int) getId().get(), neighbors, var);//, numMegaSetps);
        aggregate(VarianceAggregatorOptimized.VAR_AGG, msg);
        // }
        voteToHalt();
        //}
    }

}