Example usage for org.apache.hadoop.io DoubleWritable DoubleWritable

List of usage examples for org.apache.hadoop.io DoubleWritable DoubleWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable DoubleWritable.

Prototype

public DoubleWritable(double value) 

Source Link

Usage

From source file:org.trustedanalytics.atk.giraph.algorithms.lda.CVB0LDAComputation.java

License:Apache License

/**
 * Initialize vertex/edges, collect graph statistics and send out messages
 *
 * @param vertex of the graph/*from ww  w  . j  a v  a  2s  . c  o  m*/
 */
private void initialize(Vertex<LdaVertexId, LdaVertexData, LdaEdgeData> vertex) {

    // initialize vertex vector, i.e., the theta for doc and phi for word in LDA
    double[] vertexValues = new double[config.numTopics()];
    vertex.getValue().setLdaResult(new DenseVector(vertexValues));

    // initialize edge vector, i.e., the gamma in LDA
    Random rand1 = new Random(vertex.getId().seed());
    long seed1 = rand1.nextInt();
    double maxDelta = 0d;
    double sumWeights = 0d;
    for (Edge<LdaVertexId, LdaEdgeData> edge : vertex.getMutableEdges()) {
        double weight = edge.getValue().getWordCount();

        // generate the random seed for this edge
        Random rand2 = new Random(edge.getTargetVertexId().seed());
        long seed2 = rand2.nextInt();
        long seed = seed1 + seed2;
        Random rand = new Random(seed);
        double[] edgeValues = new double[config.numTopics()];
        for (int i = 0; i < config.numTopics(); i++) {
            edgeValues[i] = rand.nextDouble();
        }
        Vector vector = new DenseVector(edgeValues);
        vector = vector.normalize(1d);
        edge.getValue().setVector(vector);
        // find the max delta among all edges
        double delta = vector.norm(1d) / config.numTopics();
        if (delta > maxDelta) {
            maxDelta = delta;
        }
        // the sum of weights from all edges
        sumWeights += weight;
    }
    // update vertex value
    updateVertex(vertex);
    // aggregate max delta value
    aggregate(MAX_DELTA, new DoubleWritable(maxDelta));

    // collect graph statistics
    if (vertex.getId().isDocument()) {
        aggregate(SUM_DOC_VERTEX_COUNT, new LongWritable(1));
    } else {
        aggregate(SUM_OCCURRENCE_COUNT, new DoubleWritable(sumWeights));
        aggregate(SUM_WORD_VERTEX_COUNT, new LongWritable(1));
    }

    // send out messages
    LdaMessage newMessage = new LdaMessage(vertex.getId().copy(), vertex.getValue().getLdaResult());
    sendMessageToAllEdges(vertex, newMessage);
}

From source file:org.trustedanalytics.atk.giraph.algorithms.lda.CVB0LDAComputation.java

License:Apache License

/**
 * Update edge value according to vertex and messages
 *
 * @param vertex of the graph/*from w  ww.  j av a2  s.c om*/
 * @param map of type HashMap
 */
private void updateEdge(Vertex<LdaVertexId, LdaVertexData, LdaEdgeData> vertex,
        HashMap<LdaVertexId, Vector> map) {
    Vector vector = vertex.getValue().getLdaResult();

    double maxDelta = 0d;
    for (Edge<LdaVertexId, LdaEdgeData> edge : vertex.getMutableEdges()) {
        Vector gamma = edge.getValue().getVector();
        LdaVertexId id = edge.getTargetVertexId();
        if (map.containsKey(id)) {
            Vector otherVector = map.get(id);
            Vector newGamma = null;
            if (vertex.getId().isDocument()) {
                newGamma = vector.minus(gamma).plus(config.alpha())
                        .times(otherVector.minus(gamma).plus(config.beta()))
                        .times(nk.minus(gamma).plus(numWords * config.beta()).assign(Functions.INV));
            } else {
                newGamma = vector.minus(gamma).plus(config.beta())
                        .times(otherVector.minus(gamma).plus(config.alpha()))
                        .times(nk.minus(gamma).plus(numWords * config.beta()).assign(Functions.INV));
            }
            newGamma = newGamma.normalize(1d);
            double delta = gamma.minus(newGamma).norm(1d) / config.numTopics();
            if (delta > maxDelta) {
                maxDelta = delta;
            }
            // update edge vector
            edge.getValue().setVector(newGamma);
        } else {
            // this happens when you don't have your Vertex Id's being setup correctly
            throw new IllegalArgumentException(
                    String.format("Vertex ID %s: A message is mis-matched.", vertex.getId()));
        }
    }
    aggregate(MAX_DELTA, new DoubleWritable(maxDelta));
}

From source file:org.trustedanalytics.atk.giraph.algorithms.lda.CVB0LDAComputation.java

License:Apache License

/**
 * Evaluate cost according to vertex and messages
 *
 * @param vertex of the graph//  w  ww. j ava2 s  .  co  m
 * @param messages of type iterable
 * @param map of type HashMap
 */
private void evaluateCost(Vertex<LdaVertexId, LdaVertexData, LdaEdgeData> vertex, Iterable<LdaMessage> messages,
        HashMap<LdaVertexId, Vector> map) {

    if (vertex.getId().isDocument()) {
        return;
    }
    Vector vector = vertex.getValue().getLdaResult();
    vector = vector.plus(config.beta()).times(nk.plus(numWords * config.beta()).assign(Functions.INV));

    double cost = 0d;
    for (Edge<LdaVertexId, LdaEdgeData> edge : vertex.getEdges()) {
        double weight = edge.getValue().getWordCount();
        LdaVertexId id = edge.getTargetVertexId();
        if (map.containsKey(id)) {
            Vector otherVector = map.get(id);
            otherVector = otherVector.plus(config.alpha()).normalize(1d);
            cost -= weight * Math.log(vector.dot(otherVector));
        } else {
            throw new IllegalArgumentException(
                    String.format("Vertex ID %s: A message is mis-matched", vertex.getId().getValue()));
        }
    }
    aggregate(SUM_COST, new DoubleWritable(cost));
}

From source file:org.trustedanalytics.atk.giraph.algorithms.lda.GiraphLdaComputation.java

License:Apache License

/**
 * Initialize vertex/edges, collect graph statistics and send out messages
 *
 * @param vertex of the graph/*ww w .  j  a v a 2 s.c o  m*/
 */
private void initialize(Vertex<LdaVertexId, LdaVertexData, LdaEdgeData> vertex) {

    // initialize vertex vector, i.e., the theta for doc and phi for word in LDA
    double[] vertexValues = new double[config.numTopics()];
    vertex.getValue().setLdaResult(new DenseVector(vertexValues));
    Vector updatedVector = vertex.getValue().getLdaResult().clone().assign(0d);
    // initialize edge vector, i.e., the gamma in LDA
    Random rand1 = new Random(vertex.getId().seed());
    long seed1 = rand1.nextInt();
    double maxDelta = 0d;
    double sumWeights = 0d;
    for (Edge<LdaVertexId, LdaEdgeData> edge : vertex.getMutableEdges()) {
        double weight = edge.getValue().getWordCount();

        // generate the random seed for this edge
        Random rand2 = new Random(edge.getTargetVertexId().seed());
        long seed2 = rand2.nextInt();
        long seed = seed1 + seed2;
        Random rand = new Random(seed);
        double[] edgeValues = new double[config.numTopics()];
        for (int i = 0; i < config.numTopics(); i++) {
            edgeValues[i] = rand.nextDouble();
        }
        Vector vector = new DenseVector(edgeValues);
        vector = vector.normalize(1d);
        edge.getValue().setVector(vector);
        // find the max delta among all edges
        double delta = vector.norm(1d) / config.numTopics();
        if (delta > maxDelta) {
            maxDelta = delta;
        }
        // the sum of weights from all edges
        sumWeights += weight;
        updatedVector = updateVector(updatedVector, edge);
    }
    // update vertex value
    vertex.getValue().setLdaResult(updatedVector);
    ;
    // aggregate max delta value
    aggregateWord(vertex);
    aggregate(MAX_DELTA, new DoubleWritable(maxDelta));

    // collect graph statistics
    if (vertex.getId().isDocument()) {
        aggregate(SUM_DOC_VERTEX_COUNT, new LongWritable(1));
    } else {
        aggregate(SUM_OCCURRENCE_COUNT, new DoubleWritable(sumWeights));
        aggregate(SUM_WORD_VERTEX_COUNT, new LongWritable(1));
    }

    // send out messages
    LdaMessage newMessage = new LdaMessage(vertex.getId().copy(), vertex.getValue().getLdaResult());
    sendMessageToAllEdges(vertex, newMessage);
}

From source file:org.trustedanalytics.atk.giraph.algorithms.lda.GiraphLdaComputation.java

License:Apache License

/**
 * Update vertex and outgoing edge values using current vertex values and messages
 *
 * @param vertex of the graph/*from   www.  j av a  2 s . c  o  m*/
 * @param map    Map of vertices
 */
private void updateVertex(Vertex<LdaVertexId, LdaVertexData, LdaEdgeData> vertex,
        HashMap<LdaVertexId, Vector> map) {
    Vector vector = vertex.getValue().getLdaResult();
    Vector updatedVector = vertex.getValue().getLdaResult().clone().assign(0d);
    double maxDelta = 0d;
    for (Edge<LdaVertexId, LdaEdgeData> edge : vertex.getMutableEdges()) {
        Vector gamma = edge.getValue().getVector();
        LdaVertexId id = edge.getTargetVertexId();
        if (map.containsKey(id)) {
            Vector otherVector = map.get(id);
            Vector newGamma = null;
            if (vertex.getId().isDocument()) {
                newGamma = vector.minus(gamma).plus(config.alpha())
                        .times(otherVector.minus(gamma).plus(config.beta()))
                        .times(nk.minus(gamma).plus(numWords * config.beta()).assign(Functions.INV));
            } else {
                newGamma = vector.minus(gamma).plus(config.beta())
                        .times(otherVector.minus(gamma).plus(config.alpha()))
                        .times(nk.minus(gamma).plus(numWords * config.beta()).assign(Functions.INV));
            }
            newGamma = newGamma.normalize(1d);
            double delta = gamma.minus(newGamma).norm(1d) / config.numTopics();
            if (delta > maxDelta) {
                maxDelta = delta;
            }
            // update edge vector
            edge.getValue().setVector(newGamma);
        } else {
            // this happens when you don't have your Vertex Id's being setup correctly
            throw new IllegalArgumentException(
                    String.format("Vertex ID %s: A message is mis-matched.", vertex.getId()));
        }

        updatedVector = updateVector(updatedVector, edge);
    }

    vertex.getValue().setLdaResult(updatedVector);

    aggregateWord(vertex);
    aggregate(MAX_DELTA, new DoubleWritable(maxDelta));
}

From source file:org.trustedanalytics.atk.giraph.algorithms.lp.LabelPropagationComputation.java

License:Apache License

@Override
public void compute(Vertex<LongWritable, VertexData4LPWritable, DoubleWritable> vertex,
        Iterable<IdWithVectorMessage> messages) throws IOException {
    long superStep = getSuperstep();

    if (superStep == 0) {
        initializeVertexEdges(vertex);/*from w  ww .ja v a  2s.c  o m*/
        vertex.voteToHalt();
    } else if (superStep <= maxSupersteps) {
        VertexData4LPWritable vertexValue = vertex.getValue();
        Vector prior = vertexValue.getPriorVector();
        Vector posterior = vertexValue.getPosteriorVector();
        double degree = vertexValue.getDegree();

        // collect messages sent to this vertex
        HashMap<Long, Vector> map = new HashMap();
        for (IdWithVectorMessage message : messages) {
            map.put(message.getData(), message.getVector());
        }

        // Update belief and calculate cost
        double hi = prior.getQuick(0);
        double fi = posterior.getQuick(0);
        double crossSum = 0d;
        Vector newBelief = posterior.clone().assign(0d);

        for (Edge<LongWritable, DoubleWritable> edge : vertex.getEdges()) {
            double weight = edge.getValue().get();
            if (weight <= 0d) {
                throw new IllegalArgumentException(
                        "Vertex ID: " + vertex.getId() + "has an edge with negative or zero value");
            }
            long targetVertex = edge.getTargetVertexId().get();
            if (map.containsKey(targetVertex)) {
                Vector tempVector = map.get(targetVertex);
                newBelief = newBelief.plus(tempVector.times(weight));
                double fj = tempVector.getQuick(0);
                crossSum += weight * fi * fj;
            }
        }

        double cost = degree
                * ((1 - lambda) * (Math.pow(fi, 2) - crossSum) + 0.5 * lambda * Math.pow((fi - hi), 2));
        aggregate(SUM_COST, new DoubleWritable(cost));

        // Update posterior if the vertex was not processed
        if (vertexValue.wasLabeled() == false) {
            newBelief = (newBelief.times(1 - lambda).plus(prior.times(lambda))).normalize(1d);
            vertexValue.setPosteriorVector(newBelief);
        }

        // Send out messages if not the last step
        if (superStep != maxSupersteps) {
            IdWithVectorMessage newMessage = new IdWithVectorMessage(vertex.getId().get(),
                    vertexValue.getPosteriorVector());
            sendMessageToAllEdges(vertex, newMessage);
        }
    }

    vertex.voteToHalt();
}

From source file:org.trustedanalytics.atk.giraph.io.BigDataEdgesTest.java

License:Apache License

@Test
public void testBigDataEdges() {
    OutEdges<LongWritable, DoubleWritable> edges = instantiateOutEdges(BigDataEdges.class);

    // Initial edges list contains parallel edges.
    List<Edge<LongWritable, DoubleWritable>> initialEdges = Lists.newArrayList(
            EdgeFactory.create(new LongWritable(1), new DoubleWritable(1)),
            EdgeFactory.create(new LongWritable(2), new DoubleWritable(2)),
            EdgeFactory.create(new LongWritable(3), new DoubleWritable(3)),
            EdgeFactory.create(new LongWritable(2), new DoubleWritable(20)));

    edges.initialize(initialEdges);/*from ww  w . j  av a  2s.  c o m*/

    // The parallel edges should still be there.
    assertEquals(4, edges.size());

    // Adding a parallel edge should increase the number of edges.
    edges.add(EdgeFactory.create(new LongWritable(3), new DoubleWritable(30)));
    assertEquals(5, edges.size());

    // Removing edges pointing to a given vertex should remove all parallel
    // edges.
    edges.remove(new LongWritable(2));
    assertEquals(3, edges.size());
}

From source file:org.wonderbee.elasticsearch.hive.ElasticSearchSerDe.java

License:Apache License

/**
 * Recursively converts an arbitrary object into the appropriate writable. Please enlighten me if there is an existing
 * method for doing this.//from  ww  w.  j  a  va 2  s  .  c  o  m
 */
private Writable toWritable(Object thing) {
    if (thing instanceof String) {
        return new Text((String) thing);
    } else if (thing instanceof Long) {
        return new LongWritable((Long) thing);
    } else if (thing instanceof Integer) {
        return new IntWritable((Integer) thing);
    } else if (thing instanceof Double) {
        return new DoubleWritable((Double) thing);
    } else if (thing instanceof Float) {
        return new FloatWritable((Float) thing);
    } else if (thing instanceof Boolean) {
        return new BooleanWritable((Boolean) thing);
    } else if (thing instanceof Map) {
        MapWritable result = new MapWritable();
        for (Map.Entry<String, Object> entry : ((Map<String, Object>) thing).entrySet()) {
            result.put(new Text(entry.getKey().toString()), toWritable(entry.getValue()));
        }
        return result;
    } else if (thing instanceof List) {
        if (((List) thing).size() > 0) {
            Object first = ((List) thing).get(0);
            Writable[] listOfThings = new Writable[((List) thing).size()];
            for (int i = 0; i < listOfThings.length; i++) {
                listOfThings[i] = toWritable(((List) thing).get(i));
            }
            return new ArrayWritable(toWritable(first).getClass(), listOfThings);
        }
    }
    return NullWritable.get();
}