List of usage examples for org.apache.hadoop.io DoubleWritable DoubleWritable
public DoubleWritable(double value)
From source file:org.apache.giraph.examples.SimpleShortestPathVertexTest.java
License:Apache License
/** test the behavior when a new, but not shorter path to a vertex has been found */ public void testOnNoShorterPathFound() throws Exception { SimpleShortestPathsVertex vertex = new SimpleShortestPathsVertex(); vertex.addEdge(new LongWritable(10L), new FloatWritable(2.5f)); vertex.addEdge(new LongWritable(20L), new FloatWritable(0.5f)); MockUtils.MockedEnvironment<LongWritable, DoubleWritable, FloatWritable, DoubleWritable> env = MockUtils .prepareVertex(vertex, 1L, new LongWritable(7L), new DoubleWritable(0.5), false); Mockito.when(env.getConfiguration().getLong(SimpleShortestPathsVertex.SOURCE_ID, SimpleShortestPathsVertex.SOURCE_ID_DEFAULT)).thenReturn(2L); vertex.compute(Lists.newArrayList(new DoubleWritable(2), new DoubleWritable(1.5)).iterator()); assertTrue(vertex.isHalted());//w ww. ja va 2s . c om assertEquals(0.5, vertex.getVertexValue().get()); env.verifyNoMessageSent(); }
From source file:org.apache.giraph.examples.SimpleTolPageRankComputation.java
License:Apache License
@Override public void compute(Vertex<LongWritable, DoubleWritable, NullWritable> vertex, Iterable<DoubleWritable> messages) throws IOException { // YH: We'll use a trick to match how GraphLab async performs // PageRank w/ error tolerance termination. ////from w w w . ja v a2 s . c o m // Unlike GraphLab async, which can directly pull from neighbours, // we always need to send messages to keep neighbours up-to-date. // However, this also wakes up neighbours, which is not desirable. // // So we use two types of messages: // - update + signal => do more work to help me converge // (equivalent to GraphLab's scatter/signal) // - update only => here's my final delta, I'm done // (implicit in GraphLab's gather) // // Since messages (= vertex values) are always positive, we use // positive for update+signal and negative for update-only. // NOTE: We follow GraphLab's alternative way of computing PageRank, // which is to not divide by |V|. To get the probability value at // each vertex, take its PageRank value and divide by |V|. double oldVal = vertex.getValue().get(); boolean signalled = false; if (getLogicalSuperstep() == 0) { vertex.getValue().set(1.0); oldVal = 0.0; // so delta is > 0 signalled = true; } else { double sum = 0; for (DoubleWritable message : messages) { if (message.get() > 0) { signalled = true; } sum += Math.abs(message.get()); } vertex.getValue().set(0.15 + 0.85 * sum); } double delta = Math.abs(oldVal - vertex.getValue().get()); boolean converged = delta <= MIN_TOL.get(getConf()); // send messages only when signalled if (delta > 0 && signalled) { if (!converged) { // update+signal message (need more help) sendMessageToAllEdges(vertex, new DoubleWritable(vertex.getValue().get() / vertex.getNumEdges())); } else { // update only (I'm done) sendMessageToAllEdges(vertex, new DoubleWritable(-1.0 * vertex.getValue().get() / vertex.getNumEdges())); } } // always vote to halt vertex.voteToHalt(); }
From source file:org.apache.giraph.examples.SumAggregator.java
License:Apache License
public DoubleWritable getAggregatedValue() { return new DoubleWritable(sum); }
From source file:org.apache.giraph.examples.TopicVertexR.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessage> messages) { if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs if (getValue().equals(new DoubleWritable(0))) { //0 for vocab int edgesNum = getNumEdges(); int[] neighbors = new int[edgesNum]; neighbors[0] = edgesNum - 1; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); if (vertexid == neighbourid) continue; neighbors[index++] = (int) neighbourid; }/*from ww w .j av a 2 s .c om*/ sendMessage(edge.getTargetVertexId(), new TopicVocabMessage((int) getId().get(), neighbors)); } } } else if (getSuperstep() == 1) { if (getValue().equals(new DoubleWritable(1))) { //1 for tweet HashMap<Integer, int[]> vocabNeighbors = new HashMap<Integer, int[]>(); HashSet<Long> neighborTweets = new HashSet<Long>(); //The tweets that share at least one vocab with me for (TopicVocabMessage message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); tweetId[0] = (int) getId().get(); vocabNeighbors.put(vocabId, tweetId); for (int i = 0; i < tweetId.length; i++) neighborTweets.add(new Long(tweetId[i])); } for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) { float idf = (TWEET_SIZE) / vocabNeighbors.get((int) edge.getTargetVertexId().get()).length; float tf = edge.getValue().get(); float tfIDF = tf * idf; edge.setValue(new FloatWritable(tfIDF)); } //Send the TF-IDF (the feature vector) to all the tweets that share at least one vocab with the current tweet (neighborTweets) int edgesNum = getNumEdges(); float[] mytfidf = new float[edgesNum + 1]; int[] neighbors = new int[edgesNum + 1]; mytfidf[0] = edgesNum; neighbors[0] = edgesNum; int index = 1; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { mytfidf[index] = edge.getValue().get(); neighbors[index++] = (int) edge.getTargetVertexId().get(); } for (Long tweet : neighborTweets) { if (tweet.equals(getId().get())) continue; sendMessage(new LongWritable(tweet), new TopicVocabMessage((int) getId().get(), neighbors, mytfidf)); //message content: currentTweet, vocab ID, TF-IDF. Sent to tweet } } setValue(new DoubleWritable(0.0)); } else if (getSuperstep() == 2) { //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet HashMap<Integer, HashMap<Integer, Float>> neighborsTFIDF = new HashMap<Integer, HashMap<Integer, Float>>(); for (TopicVocabMessage message : messages) { int tweet = message.getSourceId(); int[] vocabId = message.getNeighborId(); float[] tfidf = message.getTFIDF(); HashMap<Integer, Float> currentTweetTFIDF = new HashMap<Integer, Float>(); neighborsTFIDF.put(tweet, currentTweetTFIDF); int size = vocabId[0]; for (int i = 1; i <= size; i++) currentTweetTFIDF.put(vocabId[i], tfidf[i]); } //Calculate similarity with other tweets that share at least one vocab with the current tweet float[] sims = new float[neighborsTFIDF.size()]; int simInd = 0; //Index of the simialrities array //Calculate the norm of the current tweet float norm = 0f; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { norm += edge.getValue().get() * edge.getValue().get(); } norm = (float) Math.sqrt(norm); //For each 2-hop neighboor (tweet not vocab) for (Integer neighbor : neighborsTFIDF.keySet()) { float cosSim = 0.0f; //Calc the norm of the second tweet float norm2 = 0f; HashMap<Integer, Float> currentMap = neighborsTFIDF.get(neighbor); for (Integer vocabId : currentMap.keySet()) { norm2 += currentMap.get(vocabId) * currentMap.get(vocabId); } norm2 = (float) Math.sqrt(norm2); //Calculate the numenator for (Edge<LongWritable, FloatWritable> edge : getEdges()) { if (currentMap.containsKey((int) edge.getTargetVertexId().get())) { cosSim += edge.getValue().get() * currentMap.get((int) edge.getTargetVertexId().get()); } } sims[simInd++] = cosSim / (norm * norm2); if (sims[simInd - 1] >= epson) { // add edge between the two tweets addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(sims[simInd - 1]))); ; } } //Calculate the variance float var = getVariance(sims); setValue(new DoubleWritable(var)); //} } voteToHalt(); }
From source file:org.apache.giraph.examples.TopicVertexShared.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessage> messages) { if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs if (getValue().equals(new DoubleWritable(0))) { //0 for vocab int edgesNum = getNumEdges(); int[] neighbors = new int[edgesNum]; neighbors[0] = edgesNum - 1; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); if (vertexid == neighbourid) continue; neighbors[index++] = (int) neighbourid; }// www. ja va 2s . c o m sendMessage(edge.getTargetVertexId(), new TopicVocabMessage((int) getId().get(), neighbors)); } } } else if (getSuperstep() == 1) { if (getValue().equals(new DoubleWritable(1))) { //1 for tweet setValue(new DoubleWritable(0.0)); HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>(); HashMap<Integer, ArrayList<Integer>> neighborTweets = new HashMap<Integer, ArrayList<Integer>>(); //The tweets that share at least one vocab with me float norm = 0f; for (TopicVocabMessage message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); //tweetId[0] = (int)getId().get(); float idf = (TWEET_SIZE) / tweetId.length; LongWritable vocabLong = new LongWritable(new Long(vocabId)); float tf = getEdgeValue(vocabLong).get(); float tfIDF = tf * idf; setEdgeValue(vocabLong, new FloatWritable(tfIDF)); norm += tfIDF * tfIDF; for (int i = 1; i < tweetId.length; i++) { ArrayList<Integer> neighbourTweetsVocab = neighborTweets.get(tweetId[i]); if (neighbourTweetsVocab == null) { neighbourTweetsVocab = new ArrayList<Integer>(); neighborTweets.put(tweetId[i], neighbourTweetsVocab); } neighbourTweetsVocab.add(vocabId); } tfidfs.put(vocabId, tfIDF); } norm = (float) Math.sqrt(norm); if (neighborTweets.size() != 0) setValue(new DoubleWritable(norm)); //Send the TF-IDF (the feature vector) to all the tweets that share at least one vocab with the current tweet (neighborTweets) for (Integer tweet : neighborTweets.keySet()) { Long tweetid = new Long(tweet); ArrayList<Integer> edgesNum = neighborTweets.get(tweet); float[] mytfidf = new float[edgesNum.size() + 2]; int[] neighbors = new int[edgesNum.size() + 1]; mytfidf[0] = edgesNum.size() + 1; neighbors[0] = edgesNum.size(); int index = 1; for (Integer vocabid : edgesNum) { mytfidf[index] = tfidfs.get(vocabid); neighbors[index++] = vocabid; } mytfidf[index] = norm; sendMessage(new LongWritable(tweetid), new TopicVocabMessage((int) getId().get(), neighbors, mytfidf)); //message content: currentTweet, vocab ID, TF-IDF. Sent to tweet } } } else if (getSuperstep() == 2) { //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet //Calculate similarity with other tweets that share at least one vocab with the current tweet ArrayList<Float> sims = new ArrayList<Float>(); //Calculate the norm of the current tweet float norm = new Float(getValue().get()); for (TopicVocabMessage message : messages) { float cosSim = 0.0f; int tweet = message.getSourceId(); int[] vocabId = message.getNeighborId(); float[] tfidf = message.getTFIDF(); HashMap<Integer, Float> currentTweetTFIDF = new HashMap<Integer, Float>(); float norm2 = tfidf[tfidf.length - 1]; int size = vocabId[0]; for (int i = 1; i <= size; i++) { currentTweetTFIDF.put(vocabId[i], tfidf[i]); } for (Edge<LongWritable, FloatWritable> edge : getEdges()) { if (currentTweetTFIDF.containsKey((int) edge.getTargetVertexId().get())) { cosSim += edge.getValue().get() * currentTweetTFIDF.get((int) edge.getTargetVertexId().get()); } } float similarity = cosSim / (norm * norm2); sims.add(similarity); if (similarity >= epson) { // add edge between the two tweets addEdge(EdgeFactory.create(new LongWritable(tweet), new FloatWritable(similarity))); ; } } //Calculate the variance float var = getVariance(sims); setValue(new DoubleWritable(var)); //} } voteToHalt(); }
From source file:org.apache.giraph.examples.TopicVertexVocabBased.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessageCustome> messages) { if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs if (getValue().equals(new DoubleWritable(0))) { //0 for vocab int edgesNum = getNumEdges(); int[] neighbors = new int[edgesNum + 1]; neighbors[0] = edgesNum;/* w ww . j a v a 2 s . c om*/ float[] tfidfs = new float[edgesNum + 1]; tfidfs[0] = edgesNum; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); float edgevalue = edge2.getValue().get(); float tfidf = (edgevalue * TWEET_SIZE) / edgesNum; //if (vertexid == neighbourid) //continue; tfidfs[index] = tfidf; neighbors[index++] = (int) neighbourid; } sendMessage(edge.getTargetVertexId(), new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs)); } } } else if (getSuperstep() == 1) { if (getValue().equals(new DoubleWritable(1))) { //1 for tweet setValue(new DoubleWritable(0.0)); HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>(); HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one int nodeid = (int) getId().get(); float norm = 0f; for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); float[] msgtfidfs = message.getTFIDF(); for (int i = 1; i < tweetId.length; i++) { if (nodeid == tweetId[i]) { norm += msgtfidfs[i] * msgtfidfs[i]; tfidfs.put(vocabId, msgtfidfs[i]); } else { HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]); if (neighbourTweetsVocab == null) { neighbourTweetsVocab = new HashMap<Integer, Float>(); neighborTweets.put(tweetId[i], neighbourTweetsVocab); } neighbourTweetsVocab.put(vocabId, msgtfidfs[i]); } } } norm = (float) Math.sqrt(norm); for (Integer neighbor : neighborTweets.keySet()) { float cosSim = 0.0f; HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor); for (Integer mytfidf : tfidfs.keySet()) { if (currentTweetTFIDF.containsKey(mytfidf)) { cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf); } } cosSim = cosSim / norm; addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim))); int[] sentNeighbors = new int[1]; float[] senttfidf = new float[1]; senttfidf[0] = norm; sendMessage(new LongWritable(new Long(neighbor)), new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf)); } //setValue(new DoubleWritable(newvar)); } } else if (getSuperstep() == 2) { //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet //Calculate similarity with other tweets that share at least one vocab with the current tweet ArrayList<Float> sims = new ArrayList<Float>(); HashMap<Integer, Float> norms = new HashMap<Integer, Float>(); for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); float[] tfidfs = message.getTFIDF(); norms.put(vocabId, tfidfs[0]); } for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) { int myid = (int) edge.getTargetVertexId().get(); if (myid <= 0) // vocab vertex continue; float norm = norms.get(myid); float sim = edge.getValue().get(); sims.add(sim / norm); edge.setValue(new FloatWritable(sim / norm)); } //Calculate the variance float var = getVariance(sims); setValue(new DoubleWritable(var)); //} } voteToHalt(); }
From source file:org.apache.giraph.examples.TopicVertexVocabBasedAggregate.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessageCustome> messages) { if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs if (getValue().equals(new DoubleWritable(0))) { //0 for vocab int edgesNum = getNumEdges(); int[] neighbors = new int[edgesNum + 1]; neighbors[0] = edgesNum;/*from w ww . java2 s .com*/ float[] tfidfs = new float[edgesNum + 1]; tfidfs[0] = edgesNum; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); float edgevalue = edge2.getValue().get(); float tfidf = (edgevalue * TWEET_SIZE) / edgesNum; //if (vertexid == neighbourid) //continue; tfidfs[index] = tfidf; neighbors[index++] = (int) neighbourid; } sendMessage(edge.getTargetVertexId(), new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs)); } voteToHalt(); } } else if (getSuperstep() == 1) { if (getValue().equals(new DoubleWritable(1))) { //1 for tweet setValue(new DoubleWritable(0.0)); HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>(); HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one int nodeid = (int) getId().get(); float norm = 0f; for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); float[] msgtfidfs = message.getTFIDF(); for (int i = 1; i < tweetId.length; i++) { if (nodeid == tweetId[i]) { norm += msgtfidfs[i] * msgtfidfs[i]; tfidfs.put(vocabId, msgtfidfs[i]); } else { HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]); if (neighbourTweetsVocab == null) { neighbourTweetsVocab = new HashMap<Integer, Float>(); neighborTweets.put(tweetId[i], neighbourTweetsVocab); } neighbourTweetsVocab.put(vocabId, msgtfidfs[i]); } } } norm = (float) Math.sqrt(norm); int nsize = neighborTweets.size(); for (Integer neighbor : neighborTweets.keySet()) { float cosSim = 0.0f; HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor); for (Integer mytfidf : tfidfs.keySet()) { if (currentTweetTFIDF.containsKey(mytfidf)) { cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf); } } cosSim = cosSim / norm; //if(nsize >= edgesthreshold) { addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim))); //} int[] sentNeighbors = new int[1]; float[] senttfidf = new float[1]; senttfidf[0] = norm; sendMessage(new LongWritable(new Long(neighbor)), new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf)); } //setValue(new DoubleWritable(newvar)); } } else if (getSuperstep() == 2) { //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet //Calculate similarity with other tweets that share at least one vocab with the current tweet HashMap<Integer, Float> norms = new HashMap<Integer, Float>(); for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); float[] tfidfs = message.getTFIDF(); norms.put(vocabId, tfidfs[0]); } //if(norms.size() >= edgesthreshold) { ArrayList<Float> sims = new ArrayList<Float>(); HashSet<Integer> neighbors = new HashSet<Integer>(); for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) { int myid = (int) edge.getTargetVertexId().get(); if (myid <= 0) // vocab vertex continue; float norm = norms.get(myid); float sim = edge.getValue().get(); sim = sim / norm; sims.add(sim); if (sim >= epson) neighbors.add(myid); //edge.setValue(new FloatWritable(sim)); } //Calculate the variance float var = getVariance(sims); setValue(new DoubleWritable(var)); //int numMegaSetps = (int)(getSuperstep() / (MEGA_STEP-1)); AggregateMessageCustome msg = new AggregateMessageCustome((int) getId().get(), neighbors, var);//, numMegaSetps); aggregate(VarianceAggregator.VAR_AGG, msg); // } voteToHalt(); //} } }
From source file:org.apache.giraph.examples.TopicVertexVocabBasedAggregateDynamic.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessageCustome> messages) { if (getSuperstep() % BspServiceWorker.MEGA_STEP == 0) { //Each vocab will broadcast its neighbor IDs try {/*from ww w . ja v a2s .c o m*/ if (getId().get() <= 0) { //for vocab int edgesNum = getNumEdges(); if (edgesNum == 0) { voteToHalt(); removeVertexRequest(getId()); } int[] neighbors = new int[edgesNum + 1]; neighbors[0] = edgesNum; float[] tfidfs = new float[edgesNum + 1]; tfidfs[0] = edgesNum; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); float edgevalue = edge2.getValue().get(); float tfidf = (edgevalue * TWEET_SIZE) / edgesNum; //if (vertexid == neighbourid) //continue; tfidfs[index] = tfidf; neighbors[index++] = (int) neighbourid; } sendMessage(edge.getTargetVertexId(), new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs)); } //voteToHalt(); } } catch (IOException ex) { System.out.println(ex.getMessage()); } } else if (getSuperstep() % BspServiceWorker.MEGA_STEP == 1) { if (getId().get() > 0) { // for tweet setValue(new DoubleWritable(0.0)); HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>(); HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one int nodeid = (int) getId().get(); float norm = 0f; for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); float[] msgtfidfs = message.getTFIDF(); for (int i = 1; i < tweetId.length; i++) { if (nodeid == tweetId[i]) { norm += msgtfidfs[i] * msgtfidfs[i]; tfidfs.put(vocabId, msgtfidfs[i]); } else { HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]); if (neighbourTweetsVocab == null) { neighbourTweetsVocab = new HashMap<Integer, Float>(); neighborTweets.put(tweetId[i], neighbourTweetsVocab); } neighbourTweetsVocab.put(vocabId, msgtfidfs[i]); } } } norm = (float) Math.sqrt(norm); //System.out.println(getId().get()+" : norm = "+norm); int nsize = neighborTweets.size(); for (Integer neighbor : neighborTweets.keySet()) { float cosSim = 0.0f; HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor); for (Integer mytfidf : tfidfs.keySet()) { if (currentTweetTFIDF.containsKey(mytfidf)) { cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf); //System.out.println(getId().get()+" , "+neighbor+" , "+mytfidf+" , "+currentTweetTFIDF.get(mytfidf)+" , "+tfidfs.get(mytfidf)); } } cosSim = cosSim / norm; addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim))); int[] sentNeighbors = new int[1]; float[] senttfidf = new float[1]; senttfidf[0] = norm; sendMessage(new LongWritable(new Long(neighbor)), new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf)); } } } else if (getSuperstep() % BspServiceWorker.MEGA_STEP == 2) { //try{ if (getId().get() > 0) { HashMap<Integer, Float> norms = new HashMap<Integer, Float>(); for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); float[] tfidfs = message.getTFIDF(); norms.put(vocabId, tfidfs[0]); } ArrayList<Float> sims = new ArrayList<Float>(); HashSet<Integer> neighbors = new HashSet<Integer>(); for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) { int myid = (int) edge.getTargetVertexId().get(); if (myid <= 0) // vocab vertex continue; //if(norms.get(myid) == null) { //System.out.println("In norm null = "+myid+", "+getId().get()+", "+getSuperstep()); //} float norm = norms.get(myid); float sim = edge.getValue().get(); sim = sim / norm; sims.add(sim); if (sim >= epson) neighbors.add(myid); //edge.setValue(new FloatWritable(sim)); //System.out.println(getId().get()+"\t"+myid+"\t"+sim); } //Calculate the variance float var = getVariance(sims); AggregateMessageCustome msg = new AggregateMessageCustome((int) getId().get(), neighbors, var);//, numMegaSetps); aggregate(VarianceAggregator.VAR_AGG, msg); } long numMegaSetps = getNumMegaSteps(); if (numMegaSetps * BspServiceWorker.SHIFT_SIZE >= (BspServiceWorker.TOTAL_TWEETS - BspServiceWorker.WINDOW_SIZE)) { //System.out.println("In vertex " + getId().get() +". About voting to halt, in superstep = " + getSuperstep() +", in numMegaSetps = " + numMegaSetps); voteToHalt(); } else { /* 1-Removing vertices that will chenge their identities 2-Remove edges to the vertices that will change their identities */ long stRange = getStRange(numMegaSetps); long endRange = getEndRange(stRange); //System.out.println("In vertex " + getId().get() +". checking if need to load new data, in superstep = " + getSuperstep() +", in numMegaSetps = " + numMegaSetps); if (getId().get() >= stRange && getId().get() <= endRange) { HashSet<LongWritable> ids = new HashSet<LongWritable>(); for (Edge<LongWritable, FloatWritable> edge : getEdges()) { //long targetVertexId = edge.getTargetVertexId().get(); ids.add(edge.getTargetVertexId()); //removeEdges(edge.getTargetVertexId()); //removeEdgesRequest(getId(), new LongWritable(targetVertexId)); } //removeVertexAllEdgeRquest(); for (LongWritable id : ids) { removeEdges(id); } } else { //Remove edges from this vertex to the vertices that will change their identities if (getId().get() > 0) { HashSet<LongWritable> ids = new HashSet<LongWritable>(); for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long targetVertexId = edge.getTargetVertexId().get(); if (targetVertexId > 0) { ids.add(edge.getTargetVertexId()); //removeEdges(edge.getTargetVertexId()); //removeEdgesRequest(getId(), new LongWritable(targetVertexId)); } } for (LongWritable id : ids) { removeEdges(id); } } else { HashSet<LongWritable> ids = new HashSet<LongWritable>(); for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long targetVertexId = edge.getTargetVertexId().get(); if (targetVertexId >= stRange && targetVertexId <= endRange) { //removeEdgesRequest(getId(), new LongWritable(targetVertexId)); //removeEdges(edge.getTargetVertexId()); ids.add(edge.getTargetVertexId()); } } for (LongWritable id : ids) { removeEdges(id); } } } } //} //catch(IOException ex){ //System.out.println(ex.getMessage()); //} } else if (getSuperstep() % BspServiceWorker.MEGA_STEP == 3) { long numMegaSetps = getNumMegaSteps() + 1; String hostname = getHostName(); if ((hostname != null) && (!hostname.equalsIgnoreCase(""))) { String fileName = "/user/exp/ahmed/50k_sliding_20k_dynamic/" + numMegaSetps + "-" + getHostName(); setHostName(""); System.out.println("Trying to read batch from: " + fileName); loadNewTimeSLot(fileName); } } }
From source file:org.apache.giraph.examples.TopicVertexVocabBasedAggregateDynamic.java
License:Apache License
private void loadNewTimeSLot(String filePath) { try {//from w w w.j a v a 2s .c om //Load new data Path ptRead = new Path(filePath); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream reader = fs.open(ptRead); //System.out.println("In vertex " + getId().get() +". reader = " + reader + ", in superstep = " + getSuperstep()); String line = reader.readLine(); if (line.length() != 0) { String[] splits = line.split("\t"); for (String split : splits) { addVertexRequest(new LongWritable(Long.parseLong(split)), new DoubleWritable(0)); //new vocab vertex } } while ((line = reader.readLine()) != null) { //System.out.println(line); String[] splits = line.split("\t"); long tweetId = Long.parseLong(splits[0]); //addVertexRequest(new LongWritable(tweetId) , new DoubleWritable(0)); //new vocab vertex //For each tweet int ind = 1; int endInd = (splits.length - 1) / 2; for (int i = 0; i < endInd; i++) { //create vocab vertex and add undirected edge long vocabId = Long.parseLong(splits[ind++]); float tf = Float.parseFloat(splits[ind++]); //LongWritable vocabIndex = new LongWritable(vocabId); //Add edge from the tweet to the vocab FloatWritable edgeVal = new FloatWritable(tf); //System.out.println("In vertex " + getId().get() + ", adding an edge to " + vocabId + ", superstep " + getSuperstep() + " i = " + i + ", endInd = " + endInd); addEdgeRequest(new LongWritable(tweetId), EdgeFactory.create(new LongWritable(vocabId), edgeVal)); //Add edge from the vocab to the tweet addEdgeRequest(new LongWritable(vocabId), EdgeFactory.create(new LongWritable(tweetId), edgeVal)); } } reader.close(); } catch (Exception e) { System.out.println("TopicVertexVocabBasedAggregateDynamic, loading batch file, " + e.getMessage()); } }
From source file:org.apache.giraph.examples.TopicVertexVocabBasedOptimizedAggregate.java
License:Apache License
@Override public void compute(Iterable<TopicVocabMessageCustome> messages) { if (getSuperstep() == 0) { //Each vocab will broadcast its neighbor IDs if (getValue().equals(new DoubleWritable(0))) { //0 for vocab int edgesNum = getNumEdges(); int[] neighbors = new int[edgesNum + 1]; neighbors[0] = edgesNum;//from w w w . j a va 2 s . c o m float[] tfidfs = new float[edgesNum + 1]; tfidfs[0] = edgesNum; for (Edge<LongWritable, FloatWritable> edge : getEdges()) { long vertexid = edge.getTargetVertexId().get(); int index = 1; for (Edge<LongWritable, FloatWritable> edge2 : getEdges()) { long neighbourid = edge2.getTargetVertexId().get(); float edgevalue = edge2.getValue().get(); float tfidf = (edgevalue * TWEET_SIZE) / edgesNum; //if (vertexid == neighbourid) //continue; tfidfs[index] = tfidf; neighbors[index++] = (int) neighbourid; } sendMessage(edge.getTargetVertexId(), new TopicVocabMessageCustome((int) getId().get(), neighbors, tfidfs)); } voteToHalt(); } } else if (getSuperstep() == 1) { if (getValue().equals(new DoubleWritable(1))) { //1 for tweet setValue(new DoubleWritable(0.0)); HashMap<Integer, Float> tfidfs = new HashMap<Integer, Float>(); HashMap<Integer, HashMap<Integer, Float>> neighborTweets = new HashMap<Integer, HashMap<Integer, Float>>(); //The tweets that share at least one int nodeid = (int) getId().get(); float norm = 0f; for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); int[] tweetId = message.getNeighborId(); float[] msgtfidfs = message.getTFIDF(); for (int i = 1; i < tweetId.length; i++) { if (nodeid == tweetId[i]) { norm += msgtfidfs[i] * msgtfidfs[i]; tfidfs.put(vocabId, msgtfidfs[i]); } else { HashMap<Integer, Float> neighbourTweetsVocab = neighborTweets.get(tweetId[i]); if (neighbourTweetsVocab == null) { neighbourTweetsVocab = new HashMap<Integer, Float>(); neighborTweets.put(tweetId[i], neighbourTweetsVocab); } neighbourTweetsVocab.put(vocabId, msgtfidfs[i]); } } } norm = (float) Math.sqrt(norm); int nsize = neighborTweets.size(); for (Integer neighbor : neighborTweets.keySet()) { float cosSim = 0.0f; HashMap<Integer, Float> currentTweetTFIDF = neighborTweets.get(neighbor); for (Integer mytfidf : tfidfs.keySet()) { if (currentTweetTFIDF.containsKey(mytfidf)) { cosSim += currentTweetTFIDF.get(mytfidf) * tfidfs.get(mytfidf); } } cosSim = cosSim / norm; //if(nsize >= edgesthreshold) { addEdge(EdgeFactory.create(new LongWritable(neighbor), new FloatWritable(cosSim))); //} int[] sentNeighbors = new int[1]; float[] senttfidf = new float[1]; senttfidf[0] = norm; sendMessage(new LongWritable(new Long(neighbor)), new TopicVocabMessageCustome((int) getId().get(), sentNeighbors, senttfidf)); } //setValue(new DoubleWritable(newvar)); } } else if (getSuperstep() == 2) { //if(getValue().equals(new DoubleWritable(1))){ //1 for tweet //Calculate similarity with other tweets that share at least one vocab with the current tweet HashMap<Integer, Float> norms = new HashMap<Integer, Float>(); for (TopicVocabMessageCustome message : messages) { int vocabId = message.getSourceId(); float[] tfidfs = message.getTFIDF(); norms.put(vocabId, tfidfs[0]); } //if(norms.size() >= edgesthreshold) { ArrayList<Float> sims = new ArrayList<Float>(); HashSet<Integer> neighbors = new HashSet<Integer>(); for (MutableEdge<LongWritable, FloatWritable> edge : getMutableEdges()) { int myid = (int) edge.getTargetVertexId().get(); if (myid <= 0) // vocab vertex continue; float norm = norms.get(myid); float sim = edge.getValue().get(); sim = sim / norm; sims.add(sim); if (sim >= epson) neighbors.add(myid); //edge.setValue(new FloatWritable(sim)); } //Calculate the variance float var = getVariance(sims); setValue(new DoubleWritable(var)); //int numMegaSetps = (int)(getSuperstep() / (MEGA_STEP-1)); //Collections.sort(neighbors); AggregateMessageCustome msg = new AggregateMessageCustome((int) getId().get(), neighbors, var);//, numMegaSetps); aggregate(VarianceAggregatorOptimized.VAR_AGG, msg); // } voteToHalt(); //} } }