List of usage examples for org.apache.hadoop.io DoubleWritable DoubleWritable
public DoubleWritable(double value)
From source file:org.trustedanalytics.atk.giraph.algorithms.lda.CVB0LDAComputation.java
License:Apache License
/** * Initialize vertex/edges, collect graph statistics and send out messages * * @param vertex of the graph/*from ww w . j a v a 2s . c o m*/ */ private void initialize(Vertex<LdaVertexId, LdaVertexData, LdaEdgeData> vertex) { // initialize vertex vector, i.e., the theta for doc and phi for word in LDA double[] vertexValues = new double[config.numTopics()]; vertex.getValue().setLdaResult(new DenseVector(vertexValues)); // initialize edge vector, i.e., the gamma in LDA Random rand1 = new Random(vertex.getId().seed()); long seed1 = rand1.nextInt(); double maxDelta = 0d; double sumWeights = 0d; for (Edge<LdaVertexId, LdaEdgeData> edge : vertex.getMutableEdges()) { double weight = edge.getValue().getWordCount(); // generate the random seed for this edge Random rand2 = new Random(edge.getTargetVertexId().seed()); long seed2 = rand2.nextInt(); long seed = seed1 + seed2; Random rand = new Random(seed); double[] edgeValues = new double[config.numTopics()]; for (int i = 0; i < config.numTopics(); i++) { edgeValues[i] = rand.nextDouble(); } Vector vector = new DenseVector(edgeValues); vector = vector.normalize(1d); edge.getValue().setVector(vector); // find the max delta among all edges double delta = vector.norm(1d) / config.numTopics(); if (delta > maxDelta) { maxDelta = delta; } // the sum of weights from all edges sumWeights += weight; } // update vertex value updateVertex(vertex); // aggregate max delta value aggregate(MAX_DELTA, new DoubleWritable(maxDelta)); // collect graph statistics if (vertex.getId().isDocument()) { aggregate(SUM_DOC_VERTEX_COUNT, new LongWritable(1)); } else { aggregate(SUM_OCCURRENCE_COUNT, new DoubleWritable(sumWeights)); aggregate(SUM_WORD_VERTEX_COUNT, new LongWritable(1)); } // send out messages LdaMessage newMessage = new LdaMessage(vertex.getId().copy(), vertex.getValue().getLdaResult()); sendMessageToAllEdges(vertex, newMessage); }
From source file:org.trustedanalytics.atk.giraph.algorithms.lda.CVB0LDAComputation.java
License:Apache License
/** * Update edge value according to vertex and messages * * @param vertex of the graph/*from w ww. j av a2 s.c om*/ * @param map of type HashMap */ private void updateEdge(Vertex<LdaVertexId, LdaVertexData, LdaEdgeData> vertex, HashMap<LdaVertexId, Vector> map) { Vector vector = vertex.getValue().getLdaResult(); double maxDelta = 0d; for (Edge<LdaVertexId, LdaEdgeData> edge : vertex.getMutableEdges()) { Vector gamma = edge.getValue().getVector(); LdaVertexId id = edge.getTargetVertexId(); if (map.containsKey(id)) { Vector otherVector = map.get(id); Vector newGamma = null; if (vertex.getId().isDocument()) { newGamma = vector.minus(gamma).plus(config.alpha()) .times(otherVector.minus(gamma).plus(config.beta())) .times(nk.minus(gamma).plus(numWords * config.beta()).assign(Functions.INV)); } else { newGamma = vector.minus(gamma).plus(config.beta()) .times(otherVector.minus(gamma).plus(config.alpha())) .times(nk.minus(gamma).plus(numWords * config.beta()).assign(Functions.INV)); } newGamma = newGamma.normalize(1d); double delta = gamma.minus(newGamma).norm(1d) / config.numTopics(); if (delta > maxDelta) { maxDelta = delta; } // update edge vector edge.getValue().setVector(newGamma); } else { // this happens when you don't have your Vertex Id's being setup correctly throw new IllegalArgumentException( String.format("Vertex ID %s: A message is mis-matched.", vertex.getId())); } } aggregate(MAX_DELTA, new DoubleWritable(maxDelta)); }
From source file:org.trustedanalytics.atk.giraph.algorithms.lda.CVB0LDAComputation.java
License:Apache License
/** * Evaluate cost according to vertex and messages * * @param vertex of the graph// w ww. j ava2 s . co m * @param messages of type iterable * @param map of type HashMap */ private void evaluateCost(Vertex<LdaVertexId, LdaVertexData, LdaEdgeData> vertex, Iterable<LdaMessage> messages, HashMap<LdaVertexId, Vector> map) { if (vertex.getId().isDocument()) { return; } Vector vector = vertex.getValue().getLdaResult(); vector = vector.plus(config.beta()).times(nk.plus(numWords * config.beta()).assign(Functions.INV)); double cost = 0d; for (Edge<LdaVertexId, LdaEdgeData> edge : vertex.getEdges()) { double weight = edge.getValue().getWordCount(); LdaVertexId id = edge.getTargetVertexId(); if (map.containsKey(id)) { Vector otherVector = map.get(id); otherVector = otherVector.plus(config.alpha()).normalize(1d); cost -= weight * Math.log(vector.dot(otherVector)); } else { throw new IllegalArgumentException( String.format("Vertex ID %s: A message is mis-matched", vertex.getId().getValue())); } } aggregate(SUM_COST, new DoubleWritable(cost)); }
From source file:org.trustedanalytics.atk.giraph.algorithms.lda.GiraphLdaComputation.java
License:Apache License
/** * Initialize vertex/edges, collect graph statistics and send out messages * * @param vertex of the graph/*ww w . j a v a 2 s.c o m*/ */ private void initialize(Vertex<LdaVertexId, LdaVertexData, LdaEdgeData> vertex) { // initialize vertex vector, i.e., the theta for doc and phi for word in LDA double[] vertexValues = new double[config.numTopics()]; vertex.getValue().setLdaResult(new DenseVector(vertexValues)); Vector updatedVector = vertex.getValue().getLdaResult().clone().assign(0d); // initialize edge vector, i.e., the gamma in LDA Random rand1 = new Random(vertex.getId().seed()); long seed1 = rand1.nextInt(); double maxDelta = 0d; double sumWeights = 0d; for (Edge<LdaVertexId, LdaEdgeData> edge : vertex.getMutableEdges()) { double weight = edge.getValue().getWordCount(); // generate the random seed for this edge Random rand2 = new Random(edge.getTargetVertexId().seed()); long seed2 = rand2.nextInt(); long seed = seed1 + seed2; Random rand = new Random(seed); double[] edgeValues = new double[config.numTopics()]; for (int i = 0; i < config.numTopics(); i++) { edgeValues[i] = rand.nextDouble(); } Vector vector = new DenseVector(edgeValues); vector = vector.normalize(1d); edge.getValue().setVector(vector); // find the max delta among all edges double delta = vector.norm(1d) / config.numTopics(); if (delta > maxDelta) { maxDelta = delta; } // the sum of weights from all edges sumWeights += weight; updatedVector = updateVector(updatedVector, edge); } // update vertex value vertex.getValue().setLdaResult(updatedVector); ; // aggregate max delta value aggregateWord(vertex); aggregate(MAX_DELTA, new DoubleWritable(maxDelta)); // collect graph statistics if (vertex.getId().isDocument()) { aggregate(SUM_DOC_VERTEX_COUNT, new LongWritable(1)); } else { aggregate(SUM_OCCURRENCE_COUNT, new DoubleWritable(sumWeights)); aggregate(SUM_WORD_VERTEX_COUNT, new LongWritable(1)); } // send out messages LdaMessage newMessage = new LdaMessage(vertex.getId().copy(), vertex.getValue().getLdaResult()); sendMessageToAllEdges(vertex, newMessage); }
From source file:org.trustedanalytics.atk.giraph.algorithms.lda.GiraphLdaComputation.java
License:Apache License
/** * Update vertex and outgoing edge values using current vertex values and messages * * @param vertex of the graph/*from www. j av a 2 s . c o m*/ * @param map Map of vertices */ private void updateVertex(Vertex<LdaVertexId, LdaVertexData, LdaEdgeData> vertex, HashMap<LdaVertexId, Vector> map) { Vector vector = vertex.getValue().getLdaResult(); Vector updatedVector = vertex.getValue().getLdaResult().clone().assign(0d); double maxDelta = 0d; for (Edge<LdaVertexId, LdaEdgeData> edge : vertex.getMutableEdges()) { Vector gamma = edge.getValue().getVector(); LdaVertexId id = edge.getTargetVertexId(); if (map.containsKey(id)) { Vector otherVector = map.get(id); Vector newGamma = null; if (vertex.getId().isDocument()) { newGamma = vector.minus(gamma).plus(config.alpha()) .times(otherVector.minus(gamma).plus(config.beta())) .times(nk.minus(gamma).plus(numWords * config.beta()).assign(Functions.INV)); } else { newGamma = vector.minus(gamma).plus(config.beta()) .times(otherVector.minus(gamma).plus(config.alpha())) .times(nk.minus(gamma).plus(numWords * config.beta()).assign(Functions.INV)); } newGamma = newGamma.normalize(1d); double delta = gamma.minus(newGamma).norm(1d) / config.numTopics(); if (delta > maxDelta) { maxDelta = delta; } // update edge vector edge.getValue().setVector(newGamma); } else { // this happens when you don't have your Vertex Id's being setup correctly throw new IllegalArgumentException( String.format("Vertex ID %s: A message is mis-matched.", vertex.getId())); } updatedVector = updateVector(updatedVector, edge); } vertex.getValue().setLdaResult(updatedVector); aggregateWord(vertex); aggregate(MAX_DELTA, new DoubleWritable(maxDelta)); }
From source file:org.trustedanalytics.atk.giraph.algorithms.lp.LabelPropagationComputation.java
License:Apache License
@Override public void compute(Vertex<LongWritable, VertexData4LPWritable, DoubleWritable> vertex, Iterable<IdWithVectorMessage> messages) throws IOException { long superStep = getSuperstep(); if (superStep == 0) { initializeVertexEdges(vertex);/*from w ww .ja v a 2s.c o m*/ vertex.voteToHalt(); } else if (superStep <= maxSupersteps) { VertexData4LPWritable vertexValue = vertex.getValue(); Vector prior = vertexValue.getPriorVector(); Vector posterior = vertexValue.getPosteriorVector(); double degree = vertexValue.getDegree(); // collect messages sent to this vertex HashMap<Long, Vector> map = new HashMap(); for (IdWithVectorMessage message : messages) { map.put(message.getData(), message.getVector()); } // Update belief and calculate cost double hi = prior.getQuick(0); double fi = posterior.getQuick(0); double crossSum = 0d; Vector newBelief = posterior.clone().assign(0d); for (Edge<LongWritable, DoubleWritable> edge : vertex.getEdges()) { double weight = edge.getValue().get(); if (weight <= 0d) { throw new IllegalArgumentException( "Vertex ID: " + vertex.getId() + "has an edge with negative or zero value"); } long targetVertex = edge.getTargetVertexId().get(); if (map.containsKey(targetVertex)) { Vector tempVector = map.get(targetVertex); newBelief = newBelief.plus(tempVector.times(weight)); double fj = tempVector.getQuick(0); crossSum += weight * fi * fj; } } double cost = degree * ((1 - lambda) * (Math.pow(fi, 2) - crossSum) + 0.5 * lambda * Math.pow((fi - hi), 2)); aggregate(SUM_COST, new DoubleWritable(cost)); // Update posterior if the vertex was not processed if (vertexValue.wasLabeled() == false) { newBelief = (newBelief.times(1 - lambda).plus(prior.times(lambda))).normalize(1d); vertexValue.setPosteriorVector(newBelief); } // Send out messages if not the last step if (superStep != maxSupersteps) { IdWithVectorMessage newMessage = new IdWithVectorMessage(vertex.getId().get(), vertexValue.getPosteriorVector()); sendMessageToAllEdges(vertex, newMessage); } } vertex.voteToHalt(); }
From source file:org.trustedanalytics.atk.giraph.io.BigDataEdgesTest.java
License:Apache License
@Test public void testBigDataEdges() { OutEdges<LongWritable, DoubleWritable> edges = instantiateOutEdges(BigDataEdges.class); // Initial edges list contains parallel edges. List<Edge<LongWritable, DoubleWritable>> initialEdges = Lists.newArrayList( EdgeFactory.create(new LongWritable(1), new DoubleWritable(1)), EdgeFactory.create(new LongWritable(2), new DoubleWritable(2)), EdgeFactory.create(new LongWritable(3), new DoubleWritable(3)), EdgeFactory.create(new LongWritable(2), new DoubleWritable(20))); edges.initialize(initialEdges);/*from ww w . j av a 2s. c o m*/ // The parallel edges should still be there. assertEquals(4, edges.size()); // Adding a parallel edge should increase the number of edges. edges.add(EdgeFactory.create(new LongWritable(3), new DoubleWritable(30))); assertEquals(5, edges.size()); // Removing edges pointing to a given vertex should remove all parallel // edges. edges.remove(new LongWritable(2)); assertEquals(3, edges.size()); }
From source file:org.wonderbee.elasticsearch.hive.ElasticSearchSerDe.java
License:Apache License
/** * Recursively converts an arbitrary object into the appropriate writable. Please enlighten me if there is an existing * method for doing this.//from ww w. j a va 2 s . c o m */ private Writable toWritable(Object thing) { if (thing instanceof String) { return new Text((String) thing); } else if (thing instanceof Long) { return new LongWritable((Long) thing); } else if (thing instanceof Integer) { return new IntWritable((Integer) thing); } else if (thing instanceof Double) { return new DoubleWritable((Double) thing); } else if (thing instanceof Float) { return new FloatWritable((Float) thing); } else if (thing instanceof Boolean) { return new BooleanWritable((Boolean) thing); } else if (thing instanceof Map) { MapWritable result = new MapWritable(); for (Map.Entry<String, Object> entry : ((Map<String, Object>) thing).entrySet()) { result.put(new Text(entry.getKey().toString()), toWritable(entry.getValue())); } return result; } else if (thing instanceof List) { if (((List) thing).size() > 0) { Object first = ((List) thing).get(0); Writable[] listOfThings = new Writable[((List) thing).size()]; for (int i = 0; i < listOfThings.length; i++) { listOfThings[i] = toWritable(((List) thing).get(i)); } return new ArrayWritable(toWritable(first).getClass(), listOfThings); } } return NullWritable.get(); }