List of usage examples for org.apache.hadoop.io BytesWritable getBytes
@Override public byte[] getBytes()
From source file:gov.va.research.red.ex.hadoop.REDExMapper.java
License:Apache License
@Override protected void map(NullWritable key, BytesWritable value, Mapper<NullWritable, BytesWritable, Text, MatchedElementWritable>.Context context) throws IOException, InterruptedException { String valueStr = new String(value.getBytes()); String[] keyValue = EOL.split(valueStr, 2); Text newKey = new Text(keyValue[0]); List<MatchedElement> matches = rex.extract(keyValue[1]); if (matches != null) { for (MatchedElement me : matches) { context.write(newKey, new MatchedElementWritable(me)); }/*from w w w . j av a2 s.c o m*/ } }
From source file:hivemall.tools.compress.InflateUDF.java
License:Apache License
@Override public Text evaluate(DeferredObject[] arguments) throws HiveException { if (codec == null) { this.codec = new DeflateCodec(false, true); }/*from ww w . ja v a 2 s.co m*/ Object arg0 = arguments[0].get(); if (arg0 == null) { return null; } BytesWritable b = binaryOI.getPrimitiveWritableObject(arg0); byte[] compressed = b.getBytes(); final int len = b.getLength(); final byte[] decompressed; try { decompressed = codec.decompress(compressed, 0, len); } catch (IOException e) { throw new HiveException("Failed to decompressed. Compressed data format is illegal.", e); } compressed = null; if (result == null) { result = new Text(decompressed); } else { result.set(decompressed, 0, decompressed.length); } return result; }
From source file:hivemall.tools.text.Base91UDF.java
License:Apache License
@Override public Text evaluate(DeferredObject[] arguments) throws HiveException { if (outputBuf == null) { this.outputBuf = new FastByteArrayOutputStream(4096); } else {//w w w . ja v a2s. c om outputBuf.reset(); } Object arg0 = arguments[0].get(); if (arg0 == null) { return null; } BytesWritable input = binaryOI.getPrimitiveWritableObject(arg0); final byte[] inputBytes = input.getBytes(); final int len = input.getLength(); try { Base91.encode(inputBytes, 0, len, outputBuf); } catch (IOException e) { throw new HiveException(e); } if (result == null) { byte[] outputBytes = outputBuf.toByteArray(); this.result = new Text(outputBytes); } else { byte[] outputBytes = outputBuf.getInternalArray(); int outputSize = outputBuf.size(); result.set(outputBytes, 0, outputSize); } return result; }
From source file:hydrograph.engine.cascading.scheme.avro.CustomCascadingToAvro.java
License:Apache License
protected static Object toAvroFixed(Object obj, Schema schema) { BytesWritable bytes = (BytesWritable) obj; return new Fixed(schema, Arrays.copyOfRange(bytes.getBytes(), 0, bytes.getLength())); }
From source file:hydrograph.engine.cascading.scheme.avro.CustomCascadingToAvro.java
License:Apache License
protected static Object toAvroBytes(Object obj) { BytesWritable inBytes = (BytesWritable) obj; return ByteBuffer.wrap(Arrays.copyOfRange(inBytes.getBytes(), 0, inBytes.getLength())); }
From source file:in.dream_lab.goffish.giraph.examples.SingleSourceShortestPath.java
License:Apache License
private void unpackSubgraphMessages(Iterable<IMessage<LongWritable, BytesWritable>> packedSubGraphMessages, ISubgraph<ShortestPathSubgraphValue, LongWritable, NullWritable, LongWritable, NullWritable, LongWritable> subgraph, Set<IVertex<LongWritable, NullWritable, LongWritable, NullWritable>> rootVertices) throws IOException { ShortestPathSubgraphValue subgraphValue = subgraph.getSubgraphValue(); int unpackedMessageCount = 0; int packedMessageCount = 0; for (IMessage<LongWritable, BytesWritable> iMessage : packedSubGraphMessages) { packedMessageCount++;/*from w w w.j a v a 2 s .co m*/ BytesWritable subgraphMessageValue = iMessage.getMessage(); ExtendedByteArrayDataInput dataInput = new ExtendedByteArrayDataInput(subgraphMessageValue.getBytes()); while (!dataInput.endOfInput()) { long sinkVertex = dataInput.readLong(); if (sinkVertex == -1) { break; } unpackedMessageCount++; short sinkDistance = dataInput.readShort(); //LOG.info("Test, Sink vertex received: " + sinkVertex); // SubgraphVertex<LongWritable, LongWritable, LongWritable, NullWritable, NullWritable> currentVertex = vertices.get(new LongWritable(sinkVertex)); //LOG.info("Test, Current vertex object: " + currentVertex); //LOG.info("Test, Current vertex: " + currentVertex.getVertexId()); short distance = subgraphValue.shortestDistanceMap.get(sinkVertex); if (sinkDistance < distance) { subgraphValue.shortestDistanceMap.put(sinkVertex, sinkDistance); rootVertices.add(subgraph.getVertexById(new LongWritable(sinkVertex))); } } } LOG.info("Superstep,SubgraphId,unpackedReceivedMessageCount,packedReceivedMessageCount,rootVertices:" + getSuperstep() + "," + subgraph.getSubgraphId() + "," + unpackedMessageCount + "," + packedMessageCount + "," + rootVertices.size()); }
From source file:in.dream_lab.goffish.giraph.examples.SubgraphSingleSourceShortestPathWithWeights.java
License:Apache License
private void unpackSubgraphMessages(Iterable<IMessage<LongWritable, BytesWritable>> packedSubGraphMessages, ISubgraph<ShortestPathSubgraphValue, LongWritable, DoubleWritable, LongWritable, NullWritable, LongWritable> subgraph, Set<IVertex<LongWritable, DoubleWritable, LongWritable, NullWritable>> rootVertices) throws IOException { ShortestPathSubgraphValue subgraphValue = subgraph.getSubgraphValue(); for (IMessage<LongWritable, BytesWritable> iMessage : packedSubGraphMessages) { BytesWritable subgraphMessageValue = iMessage.getMessage(); ExtendedByteArrayDataInput dataInput = new ExtendedByteArrayDataInput(subgraphMessageValue.getBytes()); while (!dataInput.endOfInput()) { long sinkVertex = dataInput.readLong(); if (sinkVertex == -1) { break; }/*from w w w. j a v a2 s . c om*/ short sinkDistance = dataInput.readShort(); //LOG.info("Test, Sink vertex received: " + sinkVertex); // SubgraphVertex<LongWritable, LongWritable, LongWritable, NullWritable, NullWritable> currentVertex = vertices.get(new LongWritable(sinkVertex)); //LOG.info("Test, Current vertex object: " + currentVertex); //LOG.info("Test, Current vertex: " + currentVertex.getVertexId()); short distance = subgraphValue.shortestDistanceMap.get(sinkVertex); if (sinkDistance < distance) { subgraphValue.shortestDistanceMap.put(sinkVertex, sinkDistance); rootVertices.add(subgraph.getVertexById(new LongWritable(sinkVertex))); } } } }
From source file:in.dream_lab.goffish.giraph.examples.SubgraphTriangleCount.java
License:Apache License
void unpackMessages(Iterable<IMessage<LongWritable, BytesWritable>> subgraphMessages, Map<Long, List<Pair<Long, Long>>> ids) throws IOException { for (IMessage<LongWritable, BytesWritable> messageItem : subgraphMessages) { BytesWritable message = messageItem.getMessage(); ExtendedByteArrayDataInput dataInput = new ExtendedByteArrayDataInput(message.getBytes()); Long targetId;/*w w w .j a va2s . c o m*/ while ((targetId = dataInput.readLong()) != -1) { Long messageId = dataInput.readLong(); Long sourceId = dataInput.readLong(); List<Pair<Long, Long>> idPairs = ids.get(targetId); if (idPairs == null) { idPairs = new LinkedList<Pair<Long, Long>>(); ids.put(targetId, idPairs); } idPairs.add(new Pair<Long, Long>(messageId, sourceId)); } } }
From source file:in.dream_lab.goffish.hama.DenseGraphLongTextAdjacencyListReader.java
License:Apache License
@Override public List<ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable>> getSubgraphs() throws IOException, SyncException, InterruptedException { /* Used for logging */ Runtime runtime = Runtime.getRuntime(); int mb = 1024 * 1024; LOG.info("Free Memory in Reader: " + runtime.freeMemory() / mb + " Total Memory: " + runtime.totalMemory() / mb); LOG.info("Free Memory after Reaching reader " + Runtime.getRuntime().freeMemory()); KeyValuePair<Writable, Writable> pair; long edgeCount = 0; vertexMap = Maps.newHashMap();// w w w . j a v a 2 s . c om remoteVertexMap = Maps.newHashMap(); LOG.info("SETUP Starting Free Memory: " + runtime.freeMemory() / mb + " Total Memory: " + runtime.totalMemory() / mb); LOG.info("SETUP Starting " + peer.getPeerIndex() + " Memory: " + Runtime.getRuntime().freeMemory()); int count = -1; while ((pair = peer.readNext()) != null) { count++; // NOTE: Confirm that data starts from value and not from key. String stringInput = pair.getValue().toString(); String vertexValue[] = stringInput.split("\\s+"); LongWritable vertexID = new LongWritable(Long.parseLong(vertexValue[0])); List<IEdge<E, LongWritable, LongWritable>> _adjList = new ArrayList<IEdge<E, LongWritable, LongWritable>>(); for (int j = 1; j < vertexValue.length; j++) { LongWritable sinkID = new LongWritable(Long.parseLong(vertexValue[j])); LongWritable edgeID = new LongWritable(edgeCount++ | (((long) peer.getPeerIndex()) << 32)); Edge<E, LongWritable, LongWritable> e = new Edge<E, LongWritable, LongWritable>(edgeID, sinkID); _adjList.add(e); } vertexMap.put(vertexID.get(), createVertexInstance(vertexID, _adjList)); } LOG.info("Number of Vertices: " + vertexMap.size() + " Edges: " + edgeCount); LOG.info("Free Memory: " + runtime.freeMemory() / mb + " Total Memory: " + runtime.totalMemory() / mb); System.gc(); peer.sync(); LOG.info("Creating Remote Vertex Objects"); /* Create remote vertex objects. */ for (IVertex<V, E, LongWritable, LongWritable> vertex : vertexMap.values()) { for (IEdge<E, LongWritable, LongWritable> e : vertex.getOutEdges()) { LongWritable sinkID = e.getSinkVertexId(); if (!vertexMap.containsKey(sinkID.get())) { IRemoteVertex<V, E, LongWritable, LongWritable, LongWritable> sink = new RemoteVertex<>(sinkID); remoteVertexMap.put(sinkID.get(), sink); } } } peer.sync(); Partition<S, V, E, LongWritable, LongWritable, LongWritable> partition = new Partition<>( peer.getPeerIndex()); LOG.info("Calling formSubgraph()"); formSubgraphs(partition); //clearing used memory vertexMap = null; LOG.info("Done with formSubgraph()"); /* * Tell other partitions our Vertex Ids and their subgraphIDs */ Message<LongWritable, LongWritable> question = new Message<LongWritable, LongWritable>(); ControlMessage controlInfo = new ControlMessage(); controlInfo.setTransmissionType(IControlMessage.TransmissionType.BROADCAST); question.setControlInfo(controlInfo); /* * Message format being sent: subgraphID1 count1 vertex1 vertex2 ... subgraphID2 count2 vertex1 vertex2 ... */ for (ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable> subgraphs : partition.getSubgraphs()) { controlInfo.addextraInfo(Longs.toByteArray(subgraphs.getSubgraphId().get())); controlInfo.addextraInfo(Longs.toByteArray(subgraphs.getLocalVertexCount())); for (IVertex<V, E, LongWritable, LongWritable> v : subgraphs.getLocalVertices()) { byte vertexIDbytes[] = Longs.toByteArray(v.getVertexId().get()); controlInfo.addextraInfo(vertexIDbytes); } } sendToAllPartitions(question); LOG.info("Completed first superstep in reader"); System.out.println("Before 2nd Superstep " + (runtime.totalMemory() - runtime.freeMemory()) / mb); peer.sync(); LOG.info("Started superstep 2 in reader"); Message<LongWritable, LongWritable> msg; Map<Integer, List<Message<LongWritable, LongWritable>>> replyMessages = new HashMap<Integer, List<Message<LongWritable, LongWritable>>>(); // Receiving 1 message per partition while ((msg = (Message<LongWritable, LongWritable>) peer.getCurrentMessage()) != null) { /* * Subgraph Partition mapping broadcast Format of received message: * partitionID subgraphID1 subgraphID2 ... */ if (msg.getMessageType() == Message.MessageType.SUBGRAPH) { Iterable<BytesWritable> subgraphList = ((ControlMessage) msg.getControlInfo()).getExtraInfo(); Integer partitionID = Ints.fromByteArray(subgraphList.iterator().next().getBytes()); for (BytesWritable subgraphListElement : Iterables.skip(subgraphList, 1)) { LongWritable subgraphID = new LongWritable(Longs.fromByteArray(subgraphListElement.getBytes())); subgraphPartitionMap.put((K) subgraphID, partitionID); } continue; } /* * receiving vertices to set Remote Vertex subgraph id. */ Iterator<BytesWritable> remoteVertexQuery = ((ControlMessage) msg.getControlInfo()).getExtraInfo() .iterator(); while (remoteVertexQuery.hasNext()) { Long subgraphID = Longs.fromByteArray(remoteVertexQuery.next().getBytes()); Long vertexCount = Longs.fromByteArray(remoteVertexQuery.next().getBytes()); for (long i = 0; i < vertexCount; i++) { Long remoteVertexID = Longs.fromByteArray(remoteVertexQuery.next().getBytes()); if (remoteVertexMap.containsKey(remoteVertexID)) { RemoteVertex<V, E, LongWritable, LongWritable, LongWritable> sink = (RemoteVertex<V, E, LongWritable, LongWritable, LongWritable>) remoteVertexMap .get(remoteVertexID); sink.setSubgraphID(new LongWritable(subgraphID)); } } } } LOG.info("Completed 2nd superstep in reader"); LOG.info("Reader finished"); return partition.getSubgraphs(); }
From source file:in.dream_lab.goffish.hama.LongTextAdjacencyListReader.java
License:Apache License
@Override public List<ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable>> getSubgraphs() throws IOException, SyncException, InterruptedException { KeyValuePair<Writable, Writable> pair; long edgeCount = 0; vertexMap = Maps.newHashMap();//from w w w. j av a 2 s . c o m remoteVertexMap = Maps.newHashMap(); while ((pair = peer.readNext()) != null) { String stringInput = pair.getValue().toString(); String vertexValue[] = stringInput.split("\\s+"); LongWritable vertexID = new LongWritable(Long.parseLong(vertexValue[0])); Vertex<V, E, LongWritable, LongWritable> vertex = new Vertex<V, E, LongWritable, LongWritable>( vertexID); for (int j = 1; j < vertexValue.length; j++) { LongWritable sinkID = new LongWritable(Long.parseLong(vertexValue[j])); LongWritable edgeID = new LongWritable(edgeCount++ | (((long) peer.getPeerIndex()) << 32)); Edge<E, LongWritable, LongWritable> e = new Edge<E, LongWritable, LongWritable>(edgeID, sinkID); vertex.addEdge(e); } vertexMap.put(vertexID.get(), vertex); } /* Create remote vertex objects. */ for (IVertex<V, E, LongWritable, LongWritable> vertex : vertexMap.values()) { for (IEdge<E, LongWritable, LongWritable> e : vertex.getOutEdges()) { LongWritable sinkID = e.getSinkVertexId(); if (!vertexMap.containsKey(sinkID.get())) { IRemoteVertex<V, E, LongWritable, LongWritable, LongWritable> sink = new RemoteVertex<>(sinkID); remoteVertexMap.put(sinkID.get(), sink); } } } Partition<S, V, E, LongWritable, LongWritable, LongWritable> partition = new Partition<>( peer.getPeerIndex()); formSubgraphs(partition); /* * Ask Remote vertices to send their subgraph IDs. Requires 2 supersteps * because the graph is directed */ Message<LongWritable, LongWritable> question = new Message<LongWritable, LongWritable>(); ControlMessage controlInfo = new ControlMessage(); controlInfo.setTransmissionType(IControlMessage.TransmissionType.BROADCAST); question.setControlInfo(controlInfo); /* * Message format being sent: partitionID remotevertex1 remotevertex2 ... */ byte partitionIDbytes[] = Ints.toByteArray(peer.getPeerIndex()); controlInfo.addextraInfo(partitionIDbytes); for (IVertex<V, E, LongWritable, LongWritable> v : remoteVertexMap.values()) { byte vertexIDbytes[] = Longs.toByteArray(v.getVertexId().get()); controlInfo.addextraInfo(vertexIDbytes); } sendToAllPartitions(question); peer.sync(); Message<LongWritable, LongWritable> msg; Map<Integer, List<Message<LongWritable, LongWritable>>> replyMessages = new HashMap<Integer, List<Message<LongWritable, LongWritable>>>(); // Receiving 1 message per partition while ((msg = (Message<LongWritable, LongWritable>) peer.getCurrentMessage()) != null) { /* * Subgraph Partition mapping broadcast Format of received message: * partitionID subgraphID1 subgraphID2 ... */ if (msg.getMessageType() == Message.MessageType.SUBGRAPH) { Iterable<BytesWritable> subgraphList = ((ControlMessage) msg.getControlInfo()).getExtraInfo(); Integer partitionID = Ints.fromByteArray(subgraphList.iterator().next().getBytes()); for (BytesWritable subgraphListElement : Iterables.skip(subgraphList, 1)) { LongWritable subgraphID = new LongWritable(Longs.fromByteArray(subgraphListElement.getBytes())); subgraphPartitionMap.put((K) subgraphID, partitionID); } continue; } /* * receiving query to find subgraph id Remote Vertex */ Iterable<BytesWritable> RemoteVertexQuery = ((ControlMessage) msg.getControlInfo()).getExtraInfo(); /* * Reply format : sinkID1 subgraphID1 sinkID2 subgraphID2 ... */ Message<LongWritable, LongWritable> subgraphIDReply = new Message<LongWritable, LongWritable>(); controlInfo = new ControlMessage(); controlInfo.setTransmissionType(IControlMessage.TransmissionType.NORMAL); subgraphIDReply.setControlInfo(controlInfo); Integer sinkPartition = Ints.fromByteArray(RemoteVertexQuery.iterator().next().getBytes()); boolean hasAVertex = false; for (BytesWritable remoteVertex : Iterables.skip(RemoteVertexQuery, 1)) { LongWritable sinkID = new LongWritable(Longs.fromByteArray(remoteVertex.getBytes())); LongWritable sinkSubgraphID = vertexSubgraphMap.get(sinkID); // In case this partition does not have the vertex /* * Case 1 : If vertex does not exist Case 2 : If vertex exists but is * remote, then its subgraphID is null */ if (sinkSubgraphID == null) { continue; } hasAVertex = true; byte sinkIDbytes[] = Longs.toByteArray(sinkID.get()); controlInfo.addextraInfo(sinkIDbytes); byte subgraphIDbytes[] = Longs.toByteArray(sinkSubgraphID.get()); controlInfo.addextraInfo(subgraphIDbytes); } if (hasAVertex) { peer.send(peer.getPeerName(sinkPartition.intValue()), (Message<K, M>) subgraphIDReply); } } peer.sync(); while ((msg = (Message<LongWritable, LongWritable>) peer.getCurrentMessage()) != null) { Iterable<BytesWritable> remoteVertexReply = ((ControlMessage) msg.getControlInfo()).getExtraInfo(); Iterator<BytesWritable> queryResponse = remoteVertexReply.iterator(); while (queryResponse.hasNext()) { LongWritable sinkID = new LongWritable(Longs.fromByteArray(queryResponse.next().getBytes())); LongWritable remoteSubgraphID = new LongWritable( Longs.fromByteArray(queryResponse.next().getBytes())); RemoteVertex<V, E, LongWritable, LongWritable, LongWritable> sink = (RemoteVertex<V, E, LongWritable, LongWritable, LongWritable>) remoteVertexMap .get(sinkID.get()); assert (sink != null); sink.setSubgraphID(remoteSubgraphID); } } return partition.getSubgraphs(); }