Example usage for org.apache.hadoop.io BytesWritable getBytes

List of usage examples for org.apache.hadoop.io BytesWritable getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Get the data backing the BytesWritable.

Usage

From source file:gov.va.research.red.ex.hadoop.REDExMapper.java

License:Apache License

@Override
protected void map(NullWritable key, BytesWritable value,
        Mapper<NullWritable, BytesWritable, Text, MatchedElementWritable>.Context context)
        throws IOException, InterruptedException {
    String valueStr = new String(value.getBytes());
    String[] keyValue = EOL.split(valueStr, 2);
    Text newKey = new Text(keyValue[0]);
    List<MatchedElement> matches = rex.extract(keyValue[1]);
    if (matches != null) {
        for (MatchedElement me : matches) {
            context.write(newKey, new MatchedElementWritable(me));
        }/*from w w w . j  av  a2  s.c  o m*/
    }
}

From source file:hivemall.tools.compress.InflateUDF.java

License:Apache License

@Override
public Text evaluate(DeferredObject[] arguments) throws HiveException {
    if (codec == null) {
        this.codec = new DeflateCodec(false, true);
    }/*from   ww w  . ja  v  a  2  s.co  m*/

    Object arg0 = arguments[0].get();
    if (arg0 == null) {
        return null;
    }
    BytesWritable b = binaryOI.getPrimitiveWritableObject(arg0);
    byte[] compressed = b.getBytes();
    final int len = b.getLength();
    final byte[] decompressed;
    try {
        decompressed = codec.decompress(compressed, 0, len);
    } catch (IOException e) {
        throw new HiveException("Failed to decompressed. Compressed data format is illegal.", e);
    }
    compressed = null;
    if (result == null) {
        result = new Text(decompressed);
    } else {
        result.set(decompressed, 0, decompressed.length);
    }
    return result;
}

From source file:hivemall.tools.text.Base91UDF.java

License:Apache License

@Override
public Text evaluate(DeferredObject[] arguments) throws HiveException {
    if (outputBuf == null) {
        this.outputBuf = new FastByteArrayOutputStream(4096);
    } else {//w  w w  . ja v  a2s. c om
        outputBuf.reset();
    }

    Object arg0 = arguments[0].get();
    if (arg0 == null) {
        return null;
    }

    BytesWritable input = binaryOI.getPrimitiveWritableObject(arg0);
    final byte[] inputBytes = input.getBytes();
    final int len = input.getLength();
    try {
        Base91.encode(inputBytes, 0, len, outputBuf);
    } catch (IOException e) {
        throw new HiveException(e);
    }

    if (result == null) {
        byte[] outputBytes = outputBuf.toByteArray();
        this.result = new Text(outputBytes);
    } else {
        byte[] outputBytes = outputBuf.getInternalArray();
        int outputSize = outputBuf.size();
        result.set(outputBytes, 0, outputSize);
    }
    return result;
}

From source file:hydrograph.engine.cascading.scheme.avro.CustomCascadingToAvro.java

License:Apache License

protected static Object toAvroFixed(Object obj, Schema schema) {
    BytesWritable bytes = (BytesWritable) obj;
    return new Fixed(schema, Arrays.copyOfRange(bytes.getBytes(), 0, bytes.getLength()));
}

From source file:hydrograph.engine.cascading.scheme.avro.CustomCascadingToAvro.java

License:Apache License

protected static Object toAvroBytes(Object obj) {
    BytesWritable inBytes = (BytesWritable) obj;
    return ByteBuffer.wrap(Arrays.copyOfRange(inBytes.getBytes(), 0, inBytes.getLength()));
}

From source file:in.dream_lab.goffish.giraph.examples.SingleSourceShortestPath.java

License:Apache License

private void unpackSubgraphMessages(Iterable<IMessage<LongWritable, BytesWritable>> packedSubGraphMessages,
        ISubgraph<ShortestPathSubgraphValue, LongWritable, NullWritable, LongWritable, NullWritable, LongWritable> subgraph,
        Set<IVertex<LongWritable, NullWritable, LongWritable, NullWritable>> rootVertices) throws IOException {
    ShortestPathSubgraphValue subgraphValue = subgraph.getSubgraphValue();
    int unpackedMessageCount = 0;
    int packedMessageCount = 0;
    for (IMessage<LongWritable, BytesWritable> iMessage : packedSubGraphMessages) {
        packedMessageCount++;/*from  w w  w.j a v a  2 s  .co m*/
        BytesWritable subgraphMessageValue = iMessage.getMessage();
        ExtendedByteArrayDataInput dataInput = new ExtendedByteArrayDataInput(subgraphMessageValue.getBytes());
        while (!dataInput.endOfInput()) {
            long sinkVertex = dataInput.readLong();
            if (sinkVertex == -1) {
                break;
            }
            unpackedMessageCount++;
            short sinkDistance = dataInput.readShort();
            //LOG.info("Test, Sink vertex received: " + sinkVertex);
            //        SubgraphVertex<LongWritable, LongWritable, LongWritable, NullWritable, NullWritable> currentVertex = vertices.get(new LongWritable(sinkVertex));
            //LOG.info("Test, Current vertex object: " + currentVertex);

            //LOG.info("Test, Current vertex: " + currentVertex.getVertexId());
            short distance = subgraphValue.shortestDistanceMap.get(sinkVertex);
            if (sinkDistance < distance) {
                subgraphValue.shortestDistanceMap.put(sinkVertex, sinkDistance);
                rootVertices.add(subgraph.getVertexById(new LongWritable(sinkVertex)));
            }
        }
    }
    LOG.info("Superstep,SubgraphId,unpackedReceivedMessageCount,packedReceivedMessageCount,rootVertices:"
            + getSuperstep() + "," + subgraph.getSubgraphId() + "," + unpackedMessageCount + ","
            + packedMessageCount + "," + rootVertices.size());
}

From source file:in.dream_lab.goffish.giraph.examples.SubgraphSingleSourceShortestPathWithWeights.java

License:Apache License

private void unpackSubgraphMessages(Iterable<IMessage<LongWritable, BytesWritable>> packedSubGraphMessages,
        ISubgraph<ShortestPathSubgraphValue, LongWritable, DoubleWritable, LongWritable, NullWritable, LongWritable> subgraph,
        Set<IVertex<LongWritable, DoubleWritable, LongWritable, NullWritable>> rootVertices)
        throws IOException {
    ShortestPathSubgraphValue subgraphValue = subgraph.getSubgraphValue();
    for (IMessage<LongWritable, BytesWritable> iMessage : packedSubGraphMessages) {
        BytesWritable subgraphMessageValue = iMessage.getMessage();
        ExtendedByteArrayDataInput dataInput = new ExtendedByteArrayDataInput(subgraphMessageValue.getBytes());
        while (!dataInput.endOfInput()) {
            long sinkVertex = dataInput.readLong();
            if (sinkVertex == -1) {
                break;
            }/*from w  w w.  j  a v a2 s .  c om*/
            short sinkDistance = dataInput.readShort();
            //LOG.info("Test, Sink vertex received: " + sinkVertex);
            //        SubgraphVertex<LongWritable, LongWritable, LongWritable, NullWritable, NullWritable> currentVertex = vertices.get(new LongWritable(sinkVertex));
            //LOG.info("Test, Current vertex object: " + currentVertex);

            //LOG.info("Test, Current vertex: " + currentVertex.getVertexId());
            short distance = subgraphValue.shortestDistanceMap.get(sinkVertex);
            if (sinkDistance < distance) {
                subgraphValue.shortestDistanceMap.put(sinkVertex, sinkDistance);
                rootVertices.add(subgraph.getVertexById(new LongWritable(sinkVertex)));
            }
        }
    }
}

From source file:in.dream_lab.goffish.giraph.examples.SubgraphTriangleCount.java

License:Apache License

void unpackMessages(Iterable<IMessage<LongWritable, BytesWritable>> subgraphMessages,
        Map<Long, List<Pair<Long, Long>>> ids) throws IOException {
    for (IMessage<LongWritable, BytesWritable> messageItem : subgraphMessages) {
        BytesWritable message = messageItem.getMessage();
        ExtendedByteArrayDataInput dataInput = new ExtendedByteArrayDataInput(message.getBytes());
        Long targetId;/*w  w  w  .j a va2s  . c o m*/
        while ((targetId = dataInput.readLong()) != -1) {
            Long messageId = dataInput.readLong();
            Long sourceId = dataInput.readLong();
            List<Pair<Long, Long>> idPairs = ids.get(targetId);
            if (idPairs == null) {
                idPairs = new LinkedList<Pair<Long, Long>>();
                ids.put(targetId, idPairs);
            }
            idPairs.add(new Pair<Long, Long>(messageId, sourceId));
        }
    }
}

From source file:in.dream_lab.goffish.hama.DenseGraphLongTextAdjacencyListReader.java

License:Apache License

@Override
public List<ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable>> getSubgraphs()
        throws IOException, SyncException, InterruptedException {

    /* Used for logging */
    Runtime runtime = Runtime.getRuntime();
    int mb = 1024 * 1024;

    LOG.info("Free Memory in Reader: " + runtime.freeMemory() / mb + " Total Memory: "
            + runtime.totalMemory() / mb);

    LOG.info("Free Memory after Reaching reader " + Runtime.getRuntime().freeMemory());

    KeyValuePair<Writable, Writable> pair;
    long edgeCount = 0;

    vertexMap = Maps.newHashMap();// w  w  w  . j a v a  2  s . c om
    remoteVertexMap = Maps.newHashMap();

    LOG.info("SETUP Starting Free Memory: " + runtime.freeMemory() / mb + " Total Memory: "
            + runtime.totalMemory() / mb);
    LOG.info("SETUP Starting " + peer.getPeerIndex() + " Memory: " + Runtime.getRuntime().freeMemory());
    int count = -1;

    while ((pair = peer.readNext()) != null) {
        count++;
        // NOTE: Confirm that data starts from value and not from key.
        String stringInput = pair.getValue().toString();
        String vertexValue[] = stringInput.split("\\s+");

        LongWritable vertexID = new LongWritable(Long.parseLong(vertexValue[0]));
        List<IEdge<E, LongWritable, LongWritable>> _adjList = new ArrayList<IEdge<E, LongWritable, LongWritable>>();

        for (int j = 1; j < vertexValue.length; j++) {
            LongWritable sinkID = new LongWritable(Long.parseLong(vertexValue[j]));
            LongWritable edgeID = new LongWritable(edgeCount++ | (((long) peer.getPeerIndex()) << 32));
            Edge<E, LongWritable, LongWritable> e = new Edge<E, LongWritable, LongWritable>(edgeID, sinkID);
            _adjList.add(e);
        }
        vertexMap.put(vertexID.get(), createVertexInstance(vertexID, _adjList));

    }

    LOG.info("Number of Vertices: " + vertexMap.size() + " Edges: " + edgeCount);

    LOG.info("Free Memory: " + runtime.freeMemory() / mb + " Total Memory: " + runtime.totalMemory() / mb);
    System.gc();
    peer.sync();
    LOG.info("Creating Remote Vertex Objects");

    /* Create remote vertex objects. */
    for (IVertex<V, E, LongWritable, LongWritable> vertex : vertexMap.values()) {
        for (IEdge<E, LongWritable, LongWritable> e : vertex.getOutEdges()) {
            LongWritable sinkID = e.getSinkVertexId();
            if (!vertexMap.containsKey(sinkID.get())) {
                IRemoteVertex<V, E, LongWritable, LongWritable, LongWritable> sink = new RemoteVertex<>(sinkID);
                remoteVertexMap.put(sinkID.get(), sink);
            }
        }
    }

    peer.sync();

    Partition<S, V, E, LongWritable, LongWritable, LongWritable> partition = new Partition<>(
            peer.getPeerIndex());

    LOG.info("Calling formSubgraph()");

    formSubgraphs(partition);

    //clearing used memory
    vertexMap = null;

    LOG.info("Done with formSubgraph()");
    /*
     * Tell other partitions our Vertex Ids and their subgraphIDs
     */
    Message<LongWritable, LongWritable> question = new Message<LongWritable, LongWritable>();
    ControlMessage controlInfo = new ControlMessage();
    controlInfo.setTransmissionType(IControlMessage.TransmissionType.BROADCAST);
    question.setControlInfo(controlInfo);
    /*
     * Message format being sent: subgraphID1 count1 vertex1 vertex2 ... subgraphID2 count2 vertex1 vertex2 ...
     */
    for (ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable> subgraphs : partition.getSubgraphs()) {
        controlInfo.addextraInfo(Longs.toByteArray(subgraphs.getSubgraphId().get()));
        controlInfo.addextraInfo(Longs.toByteArray(subgraphs.getLocalVertexCount()));
        for (IVertex<V, E, LongWritable, LongWritable> v : subgraphs.getLocalVertices()) {
            byte vertexIDbytes[] = Longs.toByteArray(v.getVertexId().get());
            controlInfo.addextraInfo(vertexIDbytes);
        }
    }
    sendToAllPartitions(question);

    LOG.info("Completed first superstep in reader");
    System.out.println("Before 2nd Superstep " + (runtime.totalMemory() - runtime.freeMemory()) / mb);
    peer.sync();
    LOG.info("Started superstep 2 in reader");

    Message<LongWritable, LongWritable> msg;
    Map<Integer, List<Message<LongWritable, LongWritable>>> replyMessages = new HashMap<Integer, List<Message<LongWritable, LongWritable>>>();
    // Receiving 1 message per partition
    while ((msg = (Message<LongWritable, LongWritable>) peer.getCurrentMessage()) != null) {
        /*
         * Subgraph Partition mapping broadcast Format of received message:
         * partitionID subgraphID1 subgraphID2 ...
         */
        if (msg.getMessageType() == Message.MessageType.SUBGRAPH) {
            Iterable<BytesWritable> subgraphList = ((ControlMessage) msg.getControlInfo()).getExtraInfo();

            Integer partitionID = Ints.fromByteArray(subgraphList.iterator().next().getBytes());

            for (BytesWritable subgraphListElement : Iterables.skip(subgraphList, 1)) {
                LongWritable subgraphID = new LongWritable(Longs.fromByteArray(subgraphListElement.getBytes()));
                subgraphPartitionMap.put((K) subgraphID, partitionID);
            }
            continue;
        }

        /*
         * receiving vertices to set Remote Vertex subgraph id.
         */
        Iterator<BytesWritable> remoteVertexQuery = ((ControlMessage) msg.getControlInfo()).getExtraInfo()
                .iterator();

        while (remoteVertexQuery.hasNext()) {
            Long subgraphID = Longs.fromByteArray(remoteVertexQuery.next().getBytes());
            Long vertexCount = Longs.fromByteArray(remoteVertexQuery.next().getBytes());
            for (long i = 0; i < vertexCount; i++) {
                Long remoteVertexID = Longs.fromByteArray(remoteVertexQuery.next().getBytes());
                if (remoteVertexMap.containsKey(remoteVertexID)) {
                    RemoteVertex<V, E, LongWritable, LongWritable, LongWritable> sink = (RemoteVertex<V, E, LongWritable, LongWritable, LongWritable>) remoteVertexMap
                            .get(remoteVertexID);
                    sink.setSubgraphID(new LongWritable(subgraphID));
                }
            }
        }
    }

    LOG.info("Completed 2nd superstep in reader");
    LOG.info("Reader finished");
    return partition.getSubgraphs();
}

From source file:in.dream_lab.goffish.hama.LongTextAdjacencyListReader.java

License:Apache License

@Override
public List<ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable>> getSubgraphs()
        throws IOException, SyncException, InterruptedException {

    KeyValuePair<Writable, Writable> pair;
    long edgeCount = 0;

    vertexMap = Maps.newHashMap();//from  w  w w.  j av a  2  s . c  o  m
    remoteVertexMap = Maps.newHashMap();

    while ((pair = peer.readNext()) != null) {
        String stringInput = pair.getValue().toString();
        String vertexValue[] = stringInput.split("\\s+");

        LongWritable vertexID = new LongWritable(Long.parseLong(vertexValue[0]));
        Vertex<V, E, LongWritable, LongWritable> vertex = new Vertex<V, E, LongWritable, LongWritable>(
                vertexID);

        for (int j = 1; j < vertexValue.length; j++) {
            LongWritable sinkID = new LongWritable(Long.parseLong(vertexValue[j]));
            LongWritable edgeID = new LongWritable(edgeCount++ | (((long) peer.getPeerIndex()) << 32));
            Edge<E, LongWritable, LongWritable> e = new Edge<E, LongWritable, LongWritable>(edgeID, sinkID);
            vertex.addEdge(e);
        }

        vertexMap.put(vertexID.get(), vertex);

    }

    /* Create remote vertex objects. */
    for (IVertex<V, E, LongWritable, LongWritable> vertex : vertexMap.values()) {
        for (IEdge<E, LongWritable, LongWritable> e : vertex.getOutEdges()) {
            LongWritable sinkID = e.getSinkVertexId();
            if (!vertexMap.containsKey(sinkID.get())) {
                IRemoteVertex<V, E, LongWritable, LongWritable, LongWritable> sink = new RemoteVertex<>(sinkID);
                remoteVertexMap.put(sinkID.get(), sink);
            }
        }
    }

    Partition<S, V, E, LongWritable, LongWritable, LongWritable> partition = new Partition<>(
            peer.getPeerIndex());

    formSubgraphs(partition);

    /*
     * Ask Remote vertices to send their subgraph IDs. Requires 2 supersteps
     * because the graph is directed
     */
    Message<LongWritable, LongWritable> question = new Message<LongWritable, LongWritable>();
    ControlMessage controlInfo = new ControlMessage();
    controlInfo.setTransmissionType(IControlMessage.TransmissionType.BROADCAST);
    question.setControlInfo(controlInfo);
    /*
     * Message format being sent: partitionID remotevertex1 remotevertex2 ...
     */
    byte partitionIDbytes[] = Ints.toByteArray(peer.getPeerIndex());
    controlInfo.addextraInfo(partitionIDbytes);
    for (IVertex<V, E, LongWritable, LongWritable> v : remoteVertexMap.values()) {
        byte vertexIDbytes[] = Longs.toByteArray(v.getVertexId().get());
        controlInfo.addextraInfo(vertexIDbytes);
    }
    sendToAllPartitions(question);

    peer.sync();

    Message<LongWritable, LongWritable> msg;
    Map<Integer, List<Message<LongWritable, LongWritable>>> replyMessages = new HashMap<Integer, List<Message<LongWritable, LongWritable>>>();
    // Receiving 1 message per partition
    while ((msg = (Message<LongWritable, LongWritable>) peer.getCurrentMessage()) != null) {
        /*
         * Subgraph Partition mapping broadcast Format of received message:
         * partitionID subgraphID1 subgraphID2 ...
         */
        if (msg.getMessageType() == Message.MessageType.SUBGRAPH) {
            Iterable<BytesWritable> subgraphList = ((ControlMessage) msg.getControlInfo()).getExtraInfo();

            Integer partitionID = Ints.fromByteArray(subgraphList.iterator().next().getBytes());

            for (BytesWritable subgraphListElement : Iterables.skip(subgraphList, 1)) {
                LongWritable subgraphID = new LongWritable(Longs.fromByteArray(subgraphListElement.getBytes()));
                subgraphPartitionMap.put((K) subgraphID, partitionID);
            }
            continue;
        }

        /*
         * receiving query to find subgraph id Remote Vertex
         */
        Iterable<BytesWritable> RemoteVertexQuery = ((ControlMessage) msg.getControlInfo()).getExtraInfo();

        /*
         * Reply format : sinkID1 subgraphID1 sinkID2 subgraphID2 ...
         */
        Message<LongWritable, LongWritable> subgraphIDReply = new Message<LongWritable, LongWritable>();
        controlInfo = new ControlMessage();
        controlInfo.setTransmissionType(IControlMessage.TransmissionType.NORMAL);
        subgraphIDReply.setControlInfo(controlInfo);

        Integer sinkPartition = Ints.fromByteArray(RemoteVertexQuery.iterator().next().getBytes());
        boolean hasAVertex = false;
        for (BytesWritable remoteVertex : Iterables.skip(RemoteVertexQuery, 1)) {
            LongWritable sinkID = new LongWritable(Longs.fromByteArray(remoteVertex.getBytes()));
            LongWritable sinkSubgraphID = vertexSubgraphMap.get(sinkID);
            // In case this partition does not have the vertex
            /*
             * Case 1 : If vertex does not exist Case 2 : If vertex exists but is
             * remote, then its subgraphID is null
             */
            if (sinkSubgraphID == null) {
                continue;
            }
            hasAVertex = true;
            byte sinkIDbytes[] = Longs.toByteArray(sinkID.get());
            controlInfo.addextraInfo(sinkIDbytes);
            byte subgraphIDbytes[] = Longs.toByteArray(sinkSubgraphID.get());
            controlInfo.addextraInfo(subgraphIDbytes);
        }
        if (hasAVertex) {
            peer.send(peer.getPeerName(sinkPartition.intValue()), (Message<K, M>) subgraphIDReply);
        }
    }

    peer.sync();

    while ((msg = (Message<LongWritable, LongWritable>) peer.getCurrentMessage()) != null) {
        Iterable<BytesWritable> remoteVertexReply = ((ControlMessage) msg.getControlInfo()).getExtraInfo();

        Iterator<BytesWritable> queryResponse = remoteVertexReply.iterator();
        while (queryResponse.hasNext()) {
            LongWritable sinkID = new LongWritable(Longs.fromByteArray(queryResponse.next().getBytes()));
            LongWritable remoteSubgraphID = new LongWritable(
                    Longs.fromByteArray(queryResponse.next().getBytes()));
            RemoteVertex<V, E, LongWritable, LongWritable, LongWritable> sink = (RemoteVertex<V, E, LongWritable, LongWritable, LongWritable>) remoteVertexMap
                    .get(sinkID.get());
            assert (sink != null);
            sink.setSubgraphID(remoteSubgraphID);
        }
    }
    return partition.getSubgraphs();
}