List of usage examples for org.apache.hadoop.io BytesWritable copyBytes
public byte[] copyBytes()
From source file:LookupPostingsCompressed.java
License:Apache License
/** * Runs this tool./*from w ww . j av a 2s .com*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(LookupPostingsCompressed.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config); FSDataInputStream collection = fs.open(new Path(collectionPath)); BufferedReader d = new BufferedReader(new InputStreamReader(collection)); Text key = new Text(); PairOfWritables<VIntWritable, BytesWritable> value = new PairOfWritables<VIntWritable, BytesWritable>(); System.out.println("Looking up postings for the term \"starcross'd\""); key.set("starcross'd"); reader.get(key, value); BytesWritable postings = value.getRightElement(); ByteArrayInputStream buffer = new ByteArrayInputStream(postings.copyBytes()); DataInputStream in = new DataInputStream(buffer); int OFFSET = 0; int count; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); collection.seek(OFFSET); System.out.println(d.readLine()); } OFFSET = 0; key.set("gold"); reader.get(key, value); postings = value.getRightElement(); buffer = new ByteArrayInputStream(postings.copyBytes()); in = new DataInputStream(buffer); System.out.println("Complete postings list for 'gold': (" + value.getLeftElement() + ", ["); while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); //collection.seek(OFFSET); //System.out.println(d.readLine()); System.out.print(", "); } System.out.print("])\n"); Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry(); buffer.reset(); OFFSET = 0; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); goldHist.increment(count); } System.out.println("histogram of tf values for gold"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } buffer.close(); //Silver key.set("silver"); reader.get(key, value); postings = value.getRightElement(); buffer = new ByteArrayInputStream(postings.copyBytes()); in = new DataInputStream(buffer); System.out.println("Complete postings list for 'silver': (" + value.getLeftElement() + ", ["); while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); //collection.seek(OFFSET); //System.out.println(d.readLine()); System.out.print(", "); } System.out.print("])\n"); Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry(); buffer.reset(); OFFSET = 0; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); silverHist.increment(count); } System.out.println("histogram of tf values for silver"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } buffer.close(); key.set("bronze"); Writable w = reader.get(key, value); if (w == null) { System.out.println("the term bronze does not appear in the collection"); } collection.close(); reader.close(); return 0; }
From source file:bigsatgps.BigDataHandler.java
License:Open Source License
/** * * @param inpath//from www.j av a 2s .c om * @throws IOException */ public void SequenceToImage(String inpath) throws IOException { FileSystem fs = FileSystem.get(confHadoop); Path inputFilePath = new Path(inpath); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inputFilePath, confHadoop); Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), confHadoop); BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), confHadoop); reader.next(key, value); byte[] imagebytearray = value.copyBytes(); BufferedImage bufferedseq = ImageIO.read(new ByteArrayInputStream(imagebytearray)); String outpath = (inpath.substring(0, inpath.indexOf(".")) + "copy.tif"); System.out.println("image was successfully retrieved and written to " + outpath); ImageIO.write(bufferedseq, "tiff", new File(outpath)); }
From source file:cienciaCelularMR.McellMapper.java
@Override public void map(KeyMcell key, BytesWritable value, Context output) throws IOException, InterruptedException { try {// w w w.jav a 2 s. c om System.out.println("Entro al Map"); System.out.println("Key del mcell mapper: " + key); byte[] arrayByte = value.copyBytes(); File archivo = new File("entradaMap.mdl"); try (FileOutputStream fos = new FileOutputStream(archivo)) { fos.write(arrayByte); fos.flush(); } Process process = new ProcessBuilder("mcell.exe", "-errfile", "errorMcell.txt", "entradaMap.mdl") .start(); InputStream is = process.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); String line; Matcher matcher; System.out.println("Mcell is running"); String res = ""; while ((line = br.readLine()) != null) { res = res.concat(line); res = res.concat("\n"); output.progress(); try { matcher = pattern.matcher(line); if (matcher.find()) { int fieldCount; Text[] fields; fieldCount = matcher.groupCount(); fields = new Text[fieldCount]; for (int i = 0; i < fieldCount; i++) { fields[i] = new Text(matcher.group(i + 1)); } System.out.println("Progreso: " + Integer.parseInt(fields[0].toString()) + " de " + Integer.parseInt(fields[1].toString())); } } catch (Exception ex) { } } File errorFile = new File("errorMcell.txt"); if (errorFile.exists()) { InputStream in = new FileInputStream(errorFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String l; while ((l = reader.readLine()) != null) { res = res.concat(l); res = res.concat("\n"); } } mos.write("controloutput", key, new Text(res)); //free memory res = ""; System.out.println("Leyendo salida de MCell..."); String salidaName = "salidaMCell-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".dat"; FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName)); File salidaFile = new File("joined_1.dat"); if (salidaFile.exists()) { FileInputStream ios = new FileInputStream(salidaFile); byte[] buf = new byte[1024]; int totalbytes = 0; int bytesRead; while ((bytesRead = ios.read(buf)) > 0) { totalbytes += bytesRead; fs.write(buf, 0, bytesRead); fs.flush(); output.progress(); } fs.close(); ios.close(); System.out.println("***Mcell termino de leer y guardar archivo .dat, tamao: " + totalbytes); System.out.println("Nombre que se le pasa a Fernet: " + salidaName); output.write(key, new Text(salidaName)); } else { errorFile = new File("errorMcell.txt"); if (errorFile.exists()) { InputStream in = new FileInputStream(errorFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String l; while ((l = reader.readLine()) != null) { res = res.concat(l); res = res.concat("\n"); } if (!"".equals(res)) { mos.write("errormcell", key, new Text(res)); } } } } catch (IOException | IllegalArgumentException | InterruptedException ex) { String salidaName = "errorMapper-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".txt"; FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName)); fs.write(new Byte("Error en Mapper MCell:")); fs.write(new Byte("\n")); fs.flush(); fs.close(); Logger.getLogger(McellMapper.class.getName()).log(Level.SEVERE, null, ex); throw new InterruptedException(ex.getMessage()); } }
From source file:com.cloudera.impala.TestUdf.java
License:Apache License
public BytesWritable evaluate(BytesWritable a) { if (a == null) return null; return new BytesWritable(a.copyBytes()); }
From source file:com.xiaoxiaomo.mr.MapperTest.java
License:Apache License
@Test public void testCanMapEmptyPayload() throws IOException { KafkaInputSplit split = new KafkaInputSplit(1, "host-01", "topic1", 3, 1234567890L); MsgMetadataWritable inputKey = new MsgMetadataWritable(split, split.getStartOffset()); BytesWritable inputValue = new BytesWritable(new byte[0]); mapDriver.withInput(inputKey, inputValue); final List<Pair<MsgMetadataWritable, BytesWritable>> result = mapDriver.run(); BytesWritable value = result.get(0).getSecond(); assertNotNull(value);//from w w w . ja v a2 s . c o m assertEquals(0, value.copyBytes().length); }
From source file:eagle.query.aggregate.raw.RawGroupbyBucket.java
License:Apache License
/** * expensive operation - create objects and format the result * @return//from w w w . j a va 2s . co m */ public Map<List<String>, List<Double>> result() { Map<List<String>, List<Double>> result = new HashMap<List<String>, List<Double>>(); for (Map.Entry<GroupbyKey, List<Function>> entry : this.group2FunctionMap.entrySet()) { List<Double> values = new ArrayList<Double>(); for (Function f : entry.getValue()) { values.add(f.result()); } GroupbyKey key = entry.getKey(); List<BytesWritable> list1 = key.getValue(); List<String> list2 = new ArrayList<String>(); for (BytesWritable e : list1) { list2.add(new String(e.copyBytes())); } result.put(list2, values); } return result; }
From source file:eagle.storage.hbase.aggregate.coprocessor.TestGroupAggregateClient.java
License:Apache License
private void logGroupbyKeyValue(List<GroupbyKeyValue> keyValues) { for (GroupbyKeyValue keyValue : keyValues) { GroupbyKey key = keyValue.getKey(); List<String> keys = new ArrayList<String>(); for (BytesWritable bytes : key.getValue()) { keys.add(new String(bytes.copyBytes())); }/*from w w w . j a v a2s .c om*/ List<Double> vals = new ArrayList<Double>(); GroupbyValue val = keyValue.getValue(); for (DoubleWritable dw : val.getValue()) { vals.add(dw.get()); } if (LOG.isDebugEnabled()) LOG.debug("KEY: " + keys + ", VALUE: " + vals); } }
From source file:in.dream_lab.goffish.hama.FullInfoNonSplitReader.java
License:Apache License
@Override public List<ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable>> getSubgraphs() throws IOException, SyncException, InterruptedException { long startTime = System.currentTimeMillis(); KeyValuePair<Writable, Writable> pair; while ((pair = peer.readNext()) != null) { String stringInput = pair.getValue().toString(); createVertex(stringInput);//w w w. j a v a 2 s . c om } LOG.debug("Finished Graph Loading in partition" + peer.getPeerIndex()); // broadcast all subgraphs belonging to this partition Message<K, M> subgraphMapppingMessage = new Message<>(); subgraphMapppingMessage.setMessageType(Message.MessageType.CUSTOM_MESSAGE); ControlMessage controlInfo = new ControlMessage(); controlInfo.setTransmissionType(IControlMessage.TransmissionType.BROADCAST); controlInfo.setPartitionID(peer.getPeerIndex()); subgraphMapppingMessage.setControlInfo(controlInfo); for (ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable> subgraph : partition.getSubgraphs()) { byte subgraphIDbytes[] = Longs.toByteArray(subgraph.getSubgraphId().get()); controlInfo.addextraInfo(subgraphIDbytes); } sendToAllPartitions(subgraphMapppingMessage); LOG.debug("Subgraph partition Broadcast sent"); long endTime = System.currentTimeMillis(); LOG.info("GOFFISH3.PERF.GRAPH_LOAD," + peer.getPeerIndex() + "," + peer.getSuperstepCount() + "," + startTime + "," + endTime + "," + (endTime - startTime)); peer.sync(); startTime = System.currentTimeMillis(); Message<K, M> subgraphMappingInfoMessage; while ((subgraphMappingInfoMessage = peer.getCurrentMessage()) != null) { ControlMessage receivedCtrl = (ControlMessage) subgraphMappingInfoMessage.getControlInfo(); Integer partitionID = receivedCtrl.getPartitionID(); for (BytesWritable rawSubgraphID : receivedCtrl.getExtraInfo()) { LongWritable subgraphID = new LongWritable(Longs.fromByteArray(rawSubgraphID.copyBytes())); subgraphPartitionMap.put((K) subgraphID, partitionID); } } LOG.debug("Reader Completed"); endTime = System.currentTimeMillis(); LOG.info("GOFFISH3.PERF.GRAPH_LOAD," + peer.getPeerIndex() + "," + peer.getSuperstepCount() + "," + startTime + "," + endTime + "," + (endTime - startTime)); return partition.getSubgraphs(); }
From source file:in.dream_lab.goffish.hama.FullInfoSplitReader.java
License:Apache License
@Override public List<ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable>> getSubgraphs() throws IOException, SyncException, InterruptedException { KeyValuePair<Writable, Writable> pair; while ((pair = peer.readNext()) != null) { String stringInput = pair.getValue().toString(); // pid is the first column and its range is 0 to max pid int partitionID = Integer.parseInt(stringInput.substring(0, stringInput.indexOf('\t'))); LOG.debug("partitionID = " + partitionID); if (partitionID != peer.getPeerIndex()) { // send vertex to its correct partition Message<K, M> msg = new Message<>(); msg.setMessageType(Message.MessageType.VERTEX); ControlMessage ctrl = new ControlMessage(); ctrl.setTransmissionType(IControlMessage.TransmissionType.VERTEX); ctrl.addextraInfo(stringInput.getBytes()); msg.setControlInfo(ctrl);// www . j a va2 s.co m peer.send(peer.getPeerName(partitionID), msg); } else { // belongs to this partition createVertex(stringInput); } } peer.sync(); Message<K, M> msg; //recieve all incoming vertices while ((msg = peer.getCurrentMessage()) != null) { ControlMessage receivedCtrl = (ControlMessage) msg.getControlInfo(); createVertex(new String(receivedCtrl.getExtraInfo().iterator().next().copyBytes())); } // broadcast all subgraphs belonging to this partition Message<K, M> subgraphMapppingMessage = new Message<>(); subgraphMapppingMessage.setMessageType(Message.MessageType.CUSTOM_MESSAGE); ControlMessage controlInfo = new ControlMessage(); controlInfo.setTransmissionType(IControlMessage.TransmissionType.BROADCAST); controlInfo.setPartitionID(peer.getPeerIndex()); subgraphMapppingMessage.setControlInfo(controlInfo); for (ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable> subgraph : partition.getSubgraphs()) { byte subgraphIDbytes[] = Longs.toByteArray(subgraph.getSubgraphId().get()); controlInfo.addextraInfo(subgraphIDbytes); } sendToAllPartitions(subgraphMapppingMessage); peer.sync(); Message<K, M> subgraphMappingInfoMessage; while ((subgraphMappingInfoMessage = peer.getCurrentMessage()) != null) { ControlMessage receivedCtrl = (ControlMessage) subgraphMappingInfoMessage.getControlInfo(); Integer partitionID = receivedCtrl.getPartitionID(); for (BytesWritable rawSubgraphID : receivedCtrl.getExtraInfo()) { LongWritable subgraphID = new LongWritable(Longs.fromByteArray(rawSubgraphID.copyBytes())); subgraphPartitionMap.put((K) subgraphID, partitionID); } } return partition.getSubgraphs(); }
From source file:in.dream_lab.goffish.hama.FullInfoSplitReaderInt.java
License:Apache License
@Override public List<ISubgraph<S, V, E, LongWritable, IntWritable, LongWritable>> getSubgraphs() throws IOException, SyncException, InterruptedException { KeyValuePair<Writable, Writable> pair; while ((pair = peer.readNext()) != null) { String stringInput = pair.getValue().toString(); // pid is the first column and its range is 0 to max pid int partitionID = Integer.parseInt(stringInput.substring(0, stringInput.indexOf('\t'))); LOG.debug("partitionID = " + partitionID); if (partitionID != peer.getPeerIndex()) { // send vertex to its correct partition Message<K, M> msg = new Message<>(); msg.setMessageType(Message.MessageType.VERTEX); ControlMessage ctrl = new ControlMessage(); ctrl.setTransmissionType(IControlMessage.TransmissionType.VERTEX); ctrl.addextraInfo(stringInput.getBytes()); msg.setControlInfo(ctrl);//w w w .j av a 2 s. co m peer.send(peer.getPeerName(partitionID), msg); } else { // belongs to this partition createVertex(stringInput); } } peer.sync(); Message<K, M> msg; //recieve all incoming vertices while ((msg = peer.getCurrentMessage()) != null) { ControlMessage receivedCtrl = (ControlMessage) msg.getControlInfo(); createVertex(new String(receivedCtrl.getExtraInfo().iterator().next().copyBytes())); } // broadcast all subgraphs belonging to this partition Message<K, M> subgraphMapppingMessage = new Message<>(); subgraphMapppingMessage.setMessageType(Message.MessageType.CUSTOM_MESSAGE); ControlMessage controlInfo = new ControlMessage(); controlInfo.setTransmissionType(IControlMessage.TransmissionType.BROADCAST); controlInfo.setPartitionID(peer.getPeerIndex()); subgraphMapppingMessage.setControlInfo(controlInfo); for (ISubgraph<S, V, E, LongWritable, IntWritable, LongWritable> subgraph : partition.getSubgraphs()) { byte subgraphIDbytes[] = Longs.toByteArray(subgraph.getSubgraphId().get()); controlInfo.addextraInfo(subgraphIDbytes); } sendToAllPartitions(subgraphMapppingMessage); peer.sync(); Message<K, M> subgraphMappingInfoMessage; while ((subgraphMappingInfoMessage = peer.getCurrentMessage()) != null) { ControlMessage receivedCtrl = (ControlMessage) subgraphMappingInfoMessage.getControlInfo(); Integer partitionID = receivedCtrl.getPartitionID(); for (BytesWritable rawSubgraphID : receivedCtrl.getExtraInfo()) { LongWritable subgraphID = new LongWritable(Longs.fromByteArray(rawSubgraphID.copyBytes())); subgraphPartitionMap.put((K) subgraphID, partitionID); } } return partition.getSubgraphs(); }