List of usage examples for org.apache.hadoop.io FloatWritable FloatWritable
public FloatWritable(float value)
From source file:org.apache.giraph.examples.SimpleShortestPathsVertexTest.java
License:Apache License
/** * Test the behavior when a new, but not shorter path to a vertex has been * found.// w w w . j a va2s . c o m */ @Test public void testOnNoShorterPathFound() throws Exception { SimpleShortestPathsVertex vertex = new SimpleShortestPathsVertex(); MockUtils.MockedEnvironment<LongWritable, DoubleWritable, FloatWritable, DoubleWritable> env = MockUtils .prepareVertex(vertex, 1L, new LongWritable(7L), new DoubleWritable(0.5), false); Mockito.when(SOURCE_ID.get(env.getConfiguration())).thenReturn(2L); vertex.addEdge(EdgeFactory.create(new LongWritable(10L), new FloatWritable(2.5f))); vertex.addEdge(EdgeFactory.create(new LongWritable(20L), new FloatWritable(0.5f))); vertex.compute(Lists.newArrayList(new DoubleWritable(2), new DoubleWritable(1.5))); assertTrue(vertex.isHalted()); assertEquals(0.5d, vertex.getValue().get(), 0d); env.verifyNoMessageSent(); }
From source file:org.apache.giraph.examples.SimpleShortestPathVertexTest.java
License:Apache License
/** test the behavior when a shorter path to a vertex has been found */ public void testOnShorterPathFound() throws Exception { SimpleShortestPathsVertex vertex = new SimpleShortestPathsVertex(); vertex.addEdge(new LongWritable(10L), new FloatWritable(2.5f)); vertex.addEdge(new LongWritable(20L), new FloatWritable(0.5f)); MockUtils.MockedEnvironment<LongWritable, DoubleWritable, FloatWritable, DoubleWritable> env = MockUtils .prepareVertex(vertex, 1L, new LongWritable(7L), new DoubleWritable(Double.MAX_VALUE), false); Mockito.when(env.getConfiguration().getLong(SimpleShortestPathsVertex.SOURCE_ID, SimpleShortestPathsVertex.SOURCE_ID_DEFAULT)).thenReturn(2L); vertex.compute(Lists.newArrayList(new DoubleWritable(2), new DoubleWritable(1.5)).iterator()); assertTrue(vertex.isHalted());/*from w w w . ja va 2 s .co m*/ assertEquals(1.5, vertex.getVertexValue().get()); env.verifyMessageSent(new LongWritable(10L), new DoubleWritable(4)); env.verifyMessageSent(new LongWritable(20L), new DoubleWritable(2)); }
From source file:org.apache.giraph.examples.SimpleShortestPathVertexTest.java
License:Apache License
/** test the behavior when a new, but not shorter path to a vertex has been found */ public void testOnNoShorterPathFound() throws Exception { SimpleShortestPathsVertex vertex = new SimpleShortestPathsVertex(); vertex.addEdge(new LongWritable(10L), new FloatWritable(2.5f)); vertex.addEdge(new LongWritable(20L), new FloatWritable(0.5f)); MockUtils.MockedEnvironment<LongWritable, DoubleWritable, FloatWritable, DoubleWritable> env = MockUtils .prepareVertex(vertex, 1L, new LongWritable(7L), new DoubleWritable(0.5), false); Mockito.when(env.getConfiguration().getLong(SimpleShortestPathsVertex.SOURCE_ID, SimpleShortestPathsVertex.SOURCE_ID_DEFAULT)).thenReturn(2L); vertex.compute(Lists.newArrayList(new DoubleWritable(2), new DoubleWritable(1.5)).iterator()); assertTrue(vertex.isHalted());//from w w w .j a va 2s. c o m assertEquals(0.5, vertex.getVertexValue().get()); env.verifyNoMessageSent(); }
From source file:org.apache.giraph.examples.TopicVertexR.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessage> messages) { if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs if (getValue().equals(new DoubleWritable(0))) { //0 for vocab int edgesNum = getNumEdges(); int[] neighbors = new int[edgesNum]; neighbors[0] = edgesNum - 1; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); if (vertexid == neighbourid) continue; neighbors[index++] = (int) neighbourid; }/* ww w .j a v a2 s. c om*/ sendMessage(edge.getTargetVertexId(), new TopicVocabMessage((int) getId().get(), neighbors)); } } } else if (getSuperstep() == 1) { if (getValue().equals(new DoubleWritable(1))) { //1 for tweet HashMap<Integer, int[]> vocabNeighbors = new HashMap<Integer, int[]>(); HashSet<Long> neighborTweets = new HashSet<Long>(); //The tweets that share at least one vocab with me for (TopicVocabMessage message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); tweetId[0] = (int) getId().get(); vocabNeighbors.put(vocabId, tweetId); for (int i = 0; i < tweetId.length; i++) neighborTweets.add(new Long(tweetId[i])); } for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) { float idf = (TWEET_SIZE) / vocabNeighbors.get((int) edge.getTargetVertexId().get()).length; float tf = edge.getValue().get(); float tfIDF = tf * idf; edge.setValue(new FloatWritable(tfIDF)); } //Send the TF-IDF (the feature vector) to all the tweets that share at least one vocab with the current tweet (neighborTweets) int edgesNum = getNumEdges(); float[] mytfidf = new float[edgesNum + 1]; int[] neighbors = new int[edgesNum + 1]; mytfidf[0] = edgesNum; neighbors[0] = edgesNum; int index = 1; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { mytfidf[index] = edge.getValue().get(); neighbors[index++] = (int) edge.getTargetVertexId().get(); } for (Long tweet : neighborTweets) { if (tweet.equals(getId().get())) continue; sendMessage(new LongWritable(tweet), new TopicVocabMessage((int) getId().get(), neighbors, mytfidf)); //message content: currentTweet, vocab ID, TF-IDF. Sent to tweet } } setValue(new DoubleWritable(0.0)); } else if (getSuperstep() == 2) { //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet HashMap<Integer, HashMap<Integer, Float>> neighborsTFIDF = new HashMap<Integer, HashMap<Integer, Float>>(); for (TopicVocabMessage message : messages) { int tweet = message.getSourceId(); int[] vocabId = message.getNeighborId(); float[] tfidf = message.getTFIDF(); HashMap<Integer, Float> currentTweetTFIDF = new HashMap<Integer, Float>(); neighborsTFIDF.put(tweet, currentTweetTFIDF); int size = vocabId[0]; for (int i = 1; i <= size; i++) currentTweetTFIDF.put(vocabId[i], tfidf[i]); } //Calculate similarity with other tweets that share at least one vocab with the current tweet float[] sims = new float[neighborsTFIDF.size()]; int simInd = 0; //Index of the simialrities array //Calculate the norm of the current tweet float norm = 0f; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { norm += edge.getValue().get() * edge.getValue().get(); } norm = (float) Math.sqrt(norm); //For each 2-hop neighboor (tweet not vocab) for (Integer neighbor : neighborsTFIDF.keySet()) { float cosSim = 0.0f; //Calc the norm of the second tweet float norm2 = 0f; HashMap<Integer, Float> currentMap = neighborsTFIDF.get(neighbor); for (Integer vocabId : currentMap.keySet()) { norm2 += currentMap.get(vocabId) * currentMap.get(vocabId); } norm2 = (float) Math.sqrt(norm2); //Calculate the numenator for (Edge<LongWritable, FloatWritable> edge : getEdges()) { if (currentMap.containsKey((int) edge.getTargetVertexId().get())) { cosSim += edge.getValue().get() * currentMap.get((int) edge.getTargetVertexId().get()); } } sims[simInd++] = cosSim / (norm * norm2); if (sims[simInd - 1] >= epson) { // add edge between the two tweets addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(sims[simInd - 1]))); ; } } //Calculate the variance float var = getVariance(sims); setValue(new DoubleWritable(var)); //} } voteToHalt(); }
From source file:org.apache.giraph.examples.TopicVertexShared.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessage> messages) { if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs if (getValue().equals(new DoubleWritable(0))) { //0 for vocab int edgesNum = getNumEdges(); int[] neighbors = new int[edgesNum]; neighbors[0] = edgesNum - 1; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); if (vertexid == neighbourid) continue; neighbors[index++] = (int) neighbourid; }//ww w. j a v a 2s.com sendMessage(edge.getTargetVertexId(), new TopicVocabMessage((int) getId().get(), neighbors)); } } } else if (getSuperstep() == 1) { if (getValue().equals(new DoubleWritable(1))) { //1 for tweet setValue(new DoubleWritable(0.0)); HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>(); HashMap<Integer, ArrayList<Integer>> neighborTweets = new HashMap<Integer, ArrayList<Integer>>(); //The tweets that share at least one vocab with me float norm = 0f; for (TopicVocabMessage message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); //tweetId[0] = (int)getId().get(); float idf = (TWEET_SIZE) / tweetId.length; LongWritable vocabLong = new LongWritable(new Long(vocabId)); float tf = getEdgeValue(vocabLong).get(); float tfIDF = tf * idf; setEdgeValue(vocabLong, new FloatWritable(tfIDF)); norm += tfIDF * tfIDF; for (int i = 1; i < tweetId.length; i++) { ArrayList<Integer> neighbourTweetsVocab = neighborTweets.get(tweetId[i]); if (neighbourTweetsVocab == null) { neighbourTweetsVocab = new ArrayList<Integer>(); neighborTweets.put(tweetId[i], neighbourTweetsVocab); } neighbourTweetsVocab.add(vocabId); } tfidfs.put(vocabId, tfIDF); } norm = (float) Math.sqrt(norm); if (neighborTweets.size() != 0) setValue(new DoubleWritable(norm)); //Send the TF-IDF (the feature vector) to all the tweets that share at least one vocab with the current tweet (neighborTweets) for (Integer tweet : neighborTweets.keySet()) { Long tweetid = new Long(tweet); ArrayList<Integer> edgesNum = neighborTweets.get(tweet); float[] mytfidf = new float[edgesNum.size() + 2]; int[] neighbors = new int[edgesNum.size() + 1]; mytfidf[0] = edgesNum.size() + 1; neighbors[0] = edgesNum.size(); int index = 1; for (Integer vocabid : edgesNum) { mytfidf[index] = tfidfs.get(vocabid); neighbors[index++] = vocabid; } mytfidf[index] = norm; sendMessage(new LongWritable(tweetid), new TopicVocabMessage((int) getId().get(), neighbors, mytfidf)); //message content: currentTweet, vocab ID, TF-IDF. Sent to tweet } } } else if (getSuperstep() == 2) { //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet //Calculate similarity with other tweets that share at least one vocab with the current tweet ArrayList<Float> sims = new ArrayList<Float>(); //Calculate the norm of the current tweet float norm = new Float(getValue().get()); for (TopicVocabMessage message : messages) { float cosSim = 0.0f; int tweet = message.getSourceId(); int[] vocabId = message.getNeighborId(); float[] tfidf = message.getTFIDF(); HashMap<Integer, Float> currentTweetTFIDF = new HashMap<Integer, Float>(); float norm2 = tfidf[tfidf.length - 1]; int size = vocabId[0]; for (int i = 1; i <= size; i++) { currentTweetTFIDF.put(vocabId[i], tfidf[i]); } for (Edge<LongWritable, FloatWritable> edge : getEdges()) { if (currentTweetTFIDF.containsKey((int) edge.getTargetVertexId().get())) { cosSim += edge.getValue().get() * currentTweetTFIDF.get((int) edge.getTargetVertexId().get()); } } float similarity = cosSim / (norm * norm2); sims.add(similarity); if (similarity >= epson) { // add edge between the two tweets addEdge(EdgeFactory.create(new LongWritable(tweet), new FloatWritable(similarity))); ; } } //Calculate the variance float var = getVariance(sims); setValue(new DoubleWritable(var)); //} } voteToHalt(); }
From source file:org.apache.giraph.examples.TopicVertexVocabBased.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessageCustome> messages) { if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs if (getValue().equals(new DoubleWritable(0))) { //0 for vocab int edgesNum = getNumEdges(); int[] neighbors = new int[edgesNum + 1]; neighbors[0] = edgesNum;// w w w .j av a 2s .c o m float[] tfidfs = new float[edgesNum + 1]; tfidfs[0] = edgesNum; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); float edgevalue = edge2.getValue().get(); float tfidf = (edgevalue * TWEET_SIZE) / edgesNum; //if (vertexid == neighbourid) //continue; tfidfs[index] = tfidf; neighbors[index++] = (int) neighbourid; } sendMessage(edge.getTargetVertexId(), new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs)); } } } else if (getSuperstep() == 1) { if (getValue().equals(new DoubleWritable(1))) { //1 for tweet setValue(new DoubleWritable(0.0)); HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>(); HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one int nodeid = (int) getId().get(); float norm = 0f; for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); float[] msgtfidfs = message.getTFIDF(); for (int i = 1; i < tweetId.length; i++) { if (nodeid == tweetId[i]) { norm += msgtfidfs[i] * msgtfidfs[i]; tfidfs.put(vocabId, msgtfidfs[i]); } else { HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]); if (neighbourTweetsVocab == null) { neighbourTweetsVocab = new HashMap<Integer, Float>(); neighborTweets.put(tweetId[i], neighbourTweetsVocab); } neighbourTweetsVocab.put(vocabId, msgtfidfs[i]); } } } norm = (float) Math.sqrt(norm); for (Integer neighbor : neighborTweets.keySet()) { float cosSim = 0.0f; HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor); for (Integer mytfidf : tfidfs.keySet()) { if (currentTweetTFIDF.containsKey(mytfidf)) { cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf); } } cosSim = cosSim / norm; addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim))); int[] sentNeighbors = new int[1]; float[] senttfidf = new float[1]; senttfidf[0] = norm; sendMessage(new LongWritable(new Long(neighbor)), new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf)); } //setValue(new DoubleWritable(newvar)); } } else if (getSuperstep() == 2) { //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet //Calculate similarity with other tweets that share at least one vocab with the current tweet ArrayList<Float> sims = new ArrayList<Float>(); HashMap<Integer, Float> norms = new HashMap<Integer, Float>(); for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); float[] tfidfs = message.getTFIDF(); norms.put(vocabId, tfidfs[0]); } for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) { int myid = (int) edge.getTargetVertexId().get(); if (myid <= 0) // vocab vertex continue; float norm = norms.get(myid); float sim = edge.getValue().get(); sims.add(sim / norm); edge.setValue(new FloatWritable(sim / norm)); } //Calculate the variance float var = getVariance(sims); setValue(new DoubleWritable(var)); //} } voteToHalt(); }
From source file:org.apache.giraph.examples.TopicVertexVocabBasedAggregate.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessageCustome> messages) { if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs if (getValue().equals(new DoubleWritable(0))) { //0 for vocab int edgesNum = getNumEdges(); int[] neighbors = new int[edgesNum + 1]; neighbors[0] = edgesNum;/* ww w . j av a 2 s . co m*/ float[] tfidfs = new float[edgesNum + 1]; tfidfs[0] = edgesNum; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); float edgevalue = edge2.getValue().get(); float tfidf = (edgevalue * TWEET_SIZE) / edgesNum; //if (vertexid == neighbourid) //continue; tfidfs[index] = tfidf; neighbors[index++] = (int) neighbourid; } sendMessage(edge.getTargetVertexId(), new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs)); } voteToHalt(); } } else if (getSuperstep() == 1) { if (getValue().equals(new DoubleWritable(1))) { //1 for tweet setValue(new DoubleWritable(0.0)); HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>(); HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one int nodeid = (int) getId().get(); float norm = 0f; for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); float[] msgtfidfs = message.getTFIDF(); for (int i = 1; i < tweetId.length; i++) { if (nodeid == tweetId[i]) { norm += msgtfidfs[i] * msgtfidfs[i]; tfidfs.put(vocabId, msgtfidfs[i]); } else { HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]); if (neighbourTweetsVocab == null) { neighbourTweetsVocab = new HashMap<Integer, Float>(); neighborTweets.put(tweetId[i], neighbourTweetsVocab); } neighbourTweetsVocab.put(vocabId, msgtfidfs[i]); } } } norm = (float) Math.sqrt(norm); int nsize = neighborTweets.size(); for (Integer neighbor : neighborTweets.keySet()) { float cosSim = 0.0f; HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor); for (Integer mytfidf : tfidfs.keySet()) { if (currentTweetTFIDF.containsKey(mytfidf)) { cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf); } } cosSim = cosSim / norm; //if(nsize >= edgesthreshold) { addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim))); //} int[] sentNeighbors = new int[1]; float[] senttfidf = new float[1]; senttfidf[0] = norm; sendMessage(new LongWritable(new Long(neighbor)), new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf)); } //setValue(new DoubleWritable(newvar)); } } else if (getSuperstep() == 2) { //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet //Calculate similarity with other tweets that share at least one vocab with the current tweet HashMap<Integer, Float> norms = new HashMap<Integer, Float>(); for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); float[] tfidfs = message.getTFIDF(); norms.put(vocabId, tfidfs[0]); } //if(norms.size() >= edgesthreshold) { ArrayList<Float> sims = new ArrayList<Float>(); HashSet<Integer> neighbors = new HashSet<Integer>(); for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) { int myid = (int) edge.getTargetVertexId().get(); if (myid <= 0) // vocab vertex continue; float norm = norms.get(myid); float sim = edge.getValue().get(); sim = sim / norm; sims.add(sim); if (sim >= epson) neighbors.add(myid); //edge.setValue(new FloatWritable(sim)); } //Calculate the variance float var = getVariance(sims); setValue(new DoubleWritable(var)); //int numMegaSetps = (int)(getSuperstep() / (MEGA_STEP-1)); AggregateMessageCustome msg = new AggregateMessageCustome((int) getId().get(), neighbors, var);//, numMegaSetps); aggregate(VarianceAggregator.VAR_AGG, msg); // } voteToHalt(); //} } }
From source file:org.apache.giraph.examples.TopicVertexVocabBasedAggregateDynamic.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessageCustome> messages) { if (getSuperstep() % BspServiceWorker.MEGA_STEP == 0) { //Each vocab will broadcast its neighbor IDs try {/* w ww.j av a2 s.co m*/ if (getId().get() <= 0) { //for vocab int edgesNum = getNumEdges(); if (edgesNum == 0) { voteToHalt(); removeVertexRequest(getId()); } int[] neighbors = new int[edgesNum + 1]; neighbors[0] = edgesNum; float[] tfidfs = new float[edgesNum + 1]; tfidfs[0] = edgesNum; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); float edgevalue = edge2.getValue().get(); float tfidf = (edgevalue * TWEET_SIZE) / edgesNum; //if (vertexid == neighbourid) //continue; tfidfs[index] = tfidf; neighbors[index++] = (int) neighbourid; } sendMessage(edge.getTargetVertexId(), new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs)); } //voteToHalt(); } } catch (IOException ex) { System.out.println(ex.getMessage()); } } else if (getSuperstep() % BspServiceWorker.MEGA_STEP == 1) { if (getId().get() > 0) { // for tweet setValue(new DoubleWritable(0.0)); HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>(); HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one int nodeid = (int) getId().get(); float norm = 0f; for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); float[] msgtfidfs = message.getTFIDF(); for (int i = 1; i < tweetId.length; i++) { if (nodeid == tweetId[i]) { norm += msgtfidfs[i] * msgtfidfs[i]; tfidfs.put(vocabId, msgtfidfs[i]); } else { HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]); if (neighbourTweetsVocab == null) { neighbourTweetsVocab = new HashMap<Integer, Float>(); neighborTweets.put(tweetId[i], neighbourTweetsVocab); } neighbourTweetsVocab.put(vocabId, msgtfidfs[i]); } } } norm = (float) Math.sqrt(norm); //System.out.println(getId().get()+" : norm = "+norm); int nsize = neighborTweets.size(); for (Integer neighbor : neighborTweets.keySet()) { float cosSim = 0.0f; HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor); for (Integer mytfidf : tfidfs.keySet()) { if (currentTweetTFIDF.containsKey(mytfidf)) { cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf); //System.out.println(getId().get()+" , "+neighbor+" , "+mytfidf+" , "+currentTweetTFIDF.get(mytfidf)+" , "+tfidfs.get(mytfidf)); } } cosSim = cosSim / norm; addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim))); int[] sentNeighbors = new int[1]; float[] senttfidf = new float[1]; senttfidf[0] = norm; sendMessage(new LongWritable(new Long(neighbor)), new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf)); } } } else if (getSuperstep() % BspServiceWorker.MEGA_STEP == 2) { //try{ if (getId().get() > 0) { HashMap<Integer, Float> norms = new HashMap<Integer, Float>(); for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); float[] tfidfs = message.getTFIDF(); norms.put(vocabId, tfidfs[0]); } ArrayList<Float> sims = new ArrayList<Float>(); HashSet<Integer> neighbors = new HashSet<Integer>(); for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) { int myid = (int) edge.getTargetVertexId().get(); if (myid <= 0) // vocab vertex continue; //if(norms.get(myid) == null) { //System.out.println("In norm null = "+myid+", "+getId().get()+", "+getSuperstep()); //} float norm = norms.get(myid); float sim = edge.getValue().get(); sim = sim / norm; sims.add(sim); if (sim >= epson) neighbors.add(myid); //edge.setValue(new FloatWritable(sim)); //System.out.println(getId().get()+"\t"+myid+"\t"+sim); } //Calculate the variance float var = getVariance(sims); AggregateMessageCustome msg = new AggregateMessageCustome((int) getId().get(), neighbors, var);//, numMegaSetps); aggregate(VarianceAggregator.VAR_AGG, msg); } long numMegaSetps = getNumMegaSteps(); if (numMegaSetps * BspServiceWorker.SHIFT_SIZE >= (BspServiceWorker.TOTAL_TWEETS - BspServiceWorker.WINDOW_SIZE)) { //System.out.println("In vertex " + getId().get() +". About voting to halt, in superstep = " + getSuperstep() +", in numMegaSetps = " + numMegaSetps); voteToHalt(); } else { /* 1-Removing vertices that will chenge their identities 2-Remove edges to the vertices that will change their identities */ long stRange = getStRange(numMegaSetps); long endRange = getEndRange(stRange); //System.out.println("In vertex " + getId().get() +". checking if need to load new data, in superstep = " + getSuperstep() +", in numMegaSetps = " + numMegaSetps); if (getId().get() >= stRange && getId().get() <= endRange) { HashSet<LongWritable> ids = new HashSet<LongWritable>(); for (Edge<LongWritable, FloatWritable> edge : getEdges()) { //long targetVertexId = edge.getTargetVertexId().get(); ids.add(edge.getTargetVertexId()); //removeEdges(edge.getTargetVertexId()); //removeEdgesRequest(getId(), new LongWritable(targetVertexId)); } //removeVertexAllEdgeRquest(); for (LongWritable id : ids) { removeEdges(id); } } else { //Remove edges from this vertex to the vertices that will change their identities if (getId().get() > 0) { HashSet<LongWritable> ids = new HashSet<LongWritable>(); for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long targetVertexId = edge.getTargetVertexId().get(); if (targetVertexId > 0) { ids.add(edge.getTargetVertexId()); //removeEdges(edge.getTargetVertexId()); //removeEdgesRequest(getId(), new LongWritable(targetVertexId)); } } for (LongWritable id : ids) { removeEdges(id); } } else { HashSet<LongWritable> ids = new HashSet<LongWritable>(); for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long targetVertexId = edge.getTargetVertexId().get(); if (targetVertexId >= stRange && targetVertexId <= endRange) { //removeEdgesRequest(getId(), new LongWritable(targetVertexId)); //removeEdges(edge.getTargetVertexId()); ids.add(edge.getTargetVertexId()); } } for (LongWritable id : ids) { removeEdges(id); } } } } //} //catch(IOException ex){ //System.out.println(ex.getMessage()); //} } else if (getSuperstep() % BspServiceWorker.MEGA_STEP == 3) { long numMegaSetps = getNumMegaSteps() + 1; String hostname = getHostName(); if ((hostname != null) && (!hostname.equalsIgnoreCase(""))) { String fileName = "/user/exp/ahmed/50k_sliding_20k_dynamic/" + numMegaSetps + "-" + getHostName(); setHostName(""); System.out.println("Trying to read batch from: " + fileName); loadNewTimeSLot(fileName); } } }
From source file:org.apache.giraph.examples.TopicVertexVocabBasedAggregateDynamic.java
License:Apache License
private void loadNewTimeSLot(String filePath) { try {/* w w w . jav a 2s .com*/ //Load new data Path ptRead = new Path(filePath); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream reader = fs.open(ptRead); //System.out.println("In vertex " + getId().get() +". reader = " + reader + ", in superstep = " + getSuperstep()); String line = reader.readLine(); if (line.length() != 0) { String[] splits = line.split("\t"); for (String split : splits) { addVertexRequest(new LongWritable(Long.parseLong(split)), new DoubleWritable(0)); //new vocab vertex } } while ((line = reader.readLine()) != null) { //System.out.println(line); String[] splits = line.split("\t"); long tweetId = Long.parseLong(splits[0]); //addVertexRequest(new LongWritable(tweetId) , new DoubleWritable(0)); //new vocab vertex //For each tweet int ind = 1; int endInd = (splits.length - 1) / 2; for (int i = 0; i < endInd; i++) { //create vocab vertex and add undirected edge long vocabId = Long.parseLong(splits[ind++]); float tf = Float.parseFloat(splits[ind++]); //LongWritable vocabIndex = new LongWritable(vocabId); //Add edge from the tweet to the vocab FloatWritable edgeVal = new FloatWritable(tf); //System.out.println("In vertex " + getId().get() + ", adding an edge to " + vocabId + ", superstep " + getSuperstep() + " i = " + i + ", endInd = " + endInd); addEdgeRequest(new LongWritable(tweetId), EdgeFactory.create(new LongWritable(vocabId), edgeVal)); //Add edge from the vocab to the tweet addEdgeRequest(new LongWritable(vocabId), EdgeFactory.create(new LongWritable(tweetId), edgeVal)); } } reader.close(); } catch (Exception e) { System.out.println("TopicVertexVocabBasedAggregateDynamic, loading batch file, " + e.getMessage()); } }
From source file:org.apache.giraph.examples.TopicVertexVocabBasedOptimizedAggregate.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessageCustome> messages) { if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs if (getValue().equals(new DoubleWritable(0))) { //0 for vocab int edgesNum = getNumEdges(); int[] neighbors = new int[edgesNum + 1]; neighbors[0] = edgesNum;/* w ww.j a va 2 s . co m*/ float[] tfidfs = new float[edgesNum + 1]; tfidfs[0] = edgesNum; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); float edgevalue = edge2.getValue().get(); float tfidf = (edgevalue * TWEET_SIZE) / edgesNum; //if (vertexid == neighbourid) //continue; tfidfs[index] = tfidf; neighbors[index++] = (int) neighbourid; } sendMessage(edge.getTargetVertexId(), new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs)); } voteToHalt(); } } else if (getSuperstep() == 1) { if (getValue().equals(new DoubleWritable(1))) { //1 for tweet setValue(new DoubleWritable(0.0)); HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>(); HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one int nodeid = (int) getId().get(); float norm = 0f; for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); float[] msgtfidfs = message.getTFIDF(); for (int i = 1; i < tweetId.length; i++) { if (nodeid == tweetId[i]) { norm += msgtfidfs[i] * msgtfidfs[i]; tfidfs.put(vocabId, msgtfidfs[i]); } else { HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]); if (neighbourTweetsVocab == null) { neighbourTweetsVocab = new HashMap<Integer, Float>(); neighborTweets.put(tweetId[i], neighbourTweetsVocab); } neighbourTweetsVocab.put(vocabId, msgtfidfs[i]); } } } norm = (float) Math.sqrt(norm); int nsize = neighborTweets.size(); for (Integer neighbor : neighborTweets.keySet()) { float cosSim = 0.0f; HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor); for (Integer mytfidf : tfidfs.keySet()) { if (currentTweetTFIDF.containsKey(mytfidf)) { cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf); } } cosSim = cosSim / norm; //if(nsize >= edgesthreshold) { addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim))); //} int[] sentNeighbors = new int[1]; float[] senttfidf = new float[1]; senttfidf[0] = norm; sendMessage(new LongWritable(new Long(neighbor)), new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf)); } //setValue(new DoubleWritable(newvar)); } } else if (getSuperstep() == 2) { //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet //Calculate similarity with other tweets that share at least one vocab with the current tweet HashMap<Integer, Float> norms = new HashMap<Integer, Float>(); for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); float[] tfidfs = message.getTFIDF(); norms.put(vocabId, tfidfs[0]); } //if(norms.size() >= edgesthreshold) { ArrayList<Float> sims = new ArrayList<Float>(); HashSet<Integer> neighbors = new HashSet<Integer>(); for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) { int myid = (int) edge.getTargetVertexId().get(); if (myid <= 0) // vocab vertex continue; float norm = norms.get(myid); float sim = edge.getValue().get(); sim = sim / norm; sims.add(sim); if (sim >= epson) neighbors.add(myid); //edge.setValue(new FloatWritable(sim)); } //Calculate the variance float var = getVariance(sims); setValue(new DoubleWritable(var)); //int numMegaSetps = (int)(getSuperstep() / (MEGA_STEP-1)); //Collections.sort(neighbors); AggregateMessageCustome msg = new AggregateMessageCustome((int) getId().get(), neighbors, var);//, numMegaSetps); aggregate(VarianceAggregatorOptimized.VAR_AGG, msg); // } voteToHalt(); //} } }