List of usage examples for org.apache.hadoop.io BytesWritable set
public void set(byte[] newData, int offset, int length)
From source file:io.covert.binary.analysis.BuildTarBzSequenceFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { File inDir = new File(args[0]); Path name = new Path(args[1]); Text key = new Text(); BytesWritable val = new BytesWritable(); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (!fs.exists(name)) { fs.mkdirs(name);/*from w w w . j av a2s . c om*/ } for (File file : inDir.listFiles()) { Path sequenceName = new Path(name, file.getName() + ".seq"); System.out.println("Writing to " + sequenceName); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class, BytesWritable.class, CompressionType.RECORD); if (!file.isFile()) { System.out.println("Skipping " + file + " (not a file) ..."); continue; } final InputStream is = new FileInputStream(file); final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream("tar", is); TarArchiveEntry entry = null; while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) { if (!entry.isDirectory()) { final ByteArrayOutputStream outputFileStream = new ByteArrayOutputStream(); IOUtils.copy(debInputStream, outputFileStream); outputFileStream.close(); byte[] outputFile = outputFileStream.toByteArray(); val.set(outputFile, 0, outputFile.length); MessageDigest md = MessageDigest.getInstance("MD5"); md.update(outputFile); byte[] digest = md.digest(); String hexdigest = ""; for (int i = 0; i < digest.length; i++) { hexdigest += Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1); } key.set(hexdigest); writer.append(key, val); } } debInputStream.close(); writer.close(); } return 0; }
From source file:io.github.thammegowda.Local2SeqFile.java
License:Apache License
private void writeOutput(RemoteIterator<? extends FileStatus> input) throws IOException { Path outPath = new Path(output); if (distribFs.exists(outPath)) { throw new IllegalArgumentException("Output file already exists, Not overwriting it:" + output); }/*from ww w.j a va 2s. co m*/ Writer writer = SequenceFile.createWriter(distribFs.getConf(), Writer.file(outPath), Writer.keyClass(Text.class), Writer.valueClass(BytesWritable.class), Writer.compression(SequenceFile.CompressionType.RECORD)); Text key = new Text(); BytesWritable value = new BytesWritable(); long skipped = 0; long copied = 0; while (input.hasNext()) { FileStatus next = input.next(); if (filter(next)) { key.set(next.getPath().toString()); FSDataInputStream stream = localFs.open(next.getPath()); //CAUTION : this could cause memory overflow byte[] bytes = IOUtils.toByteArray(stream); value.set(bytes, 0, bytes.length); writer.append(key, value); copied++; } else { skipped++; } } writer.close(); System.out.println("Files copied ::" + copied); System.out.println("Files skipped ::" + skipped); }
From source file:io.prestosql.plugin.hive.util.TestSerDeUtils.java
License:Apache License
@Test public void testReuse() { BytesWritable value = new BytesWritable(); byte[] first = "hello world".getBytes(UTF_8); value.set(first, 0, first.length); byte[] second = "bye".getBytes(UTF_8); value.set(second, 0, second.length); Type type = new TypeToken<Map<BytesWritable, Long>>() { }.getType();/*from w w w . j ava2s . co m*/ ObjectInspector inspector = getInspector(type); Block actual = getBlockObject(mapType(createUnboundedVarcharType(), BIGINT), ImmutableMap.of(value, 0L), inspector); Block expected = mapBlockOf(createUnboundedVarcharType(), BIGINT, "bye", 0L); assertBlockEquals(actual, expected); }
From source file:io.warp10.standalone.StandaloneChunkedMemoryStore.java
License:Apache License
public void dump(String path) throws IOException { long nano = System.nanoTime(); int gts = 0;/*w w w . ja va 2 s. c o m*/ long bytes = 0L; Configuration conf = new Configuration(); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); CompressionCodec Codec = new DefaultCodec(); SequenceFile.Writer writer = null; SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(new Path(path)); SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass()); SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass()); SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec); writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom); TSerializer serializer = new TSerializer(new TCompactProtocol.Factory()); try { for (Entry<BigInteger, InMemoryChunkSet> entry : this.series.entrySet()) { gts++; Metadata metadata = this.directoryClient.getMetadataById(entry.getKey()); List<GTSDecoder> decoders = entry.getValue().getDecoders(); //GTSEncoder encoder = entry.getValue().fetchEncoder(now, this.chunkcount * this.chunkspan); for (GTSDecoder decoder : decoders) { GTSWrapper wrapper = new GTSWrapper(metadata); wrapper.setBase(decoder.getBaseTimestamp()); wrapper.setCount(decoder.getCount()); byte[] data = serializer.serialize(wrapper); key.set(data, 0, data.length); ByteBuffer bb = decoder.getBuffer(); ByteBuffer rwbb = ByteBuffer.allocate(bb.remaining()); rwbb.put(bb); rwbb.rewind(); value.set(rwbb.array(), rwbb.arrayOffset(), rwbb.remaining()); bytes += key.getLength() + value.getLength(); writer.append(key, value); } } } catch (IOException ioe) { ioe.printStackTrace(); throw ioe; } catch (Exception e) { e.printStackTrace(); throw new IOException(e); } writer.close(); nano = System.nanoTime() - nano; System.out.println("Dumped " + gts + " GTS (" + bytes + " bytes) in " + (nano / 1000000.0D) + " ms."); }
From source file:io.warp10.standalone.StandaloneMemoryStore.java
License:Apache License
public void dump(String path) throws IOException { long nano = System.nanoTime(); int gts = 0;/* w w w . jav a 2 s . c o m*/ long bytes = 0L; Configuration conf = new Configuration(); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); CompressionCodec Codec = new DefaultCodec(); SequenceFile.Writer writer = null; SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(new Path(path)); SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass()); SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass()); SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec); writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom); TSerializer serializer = new TSerializer(new TCompactProtocol.Factory()); try { for (Entry<BigInteger, GTSEncoder> entry : this.series.entrySet()) { gts++; Metadata metadata = this.directoryClient.getMetadataById(entry.getKey()); GTSWrapper wrapper = new GTSWrapper(metadata); GTSEncoder encoder = entry.getValue(); wrapper.setBase(encoder.getBaseTimestamp()); wrapper.setCount(encoder.getCount()); byte[] data = serializer.serialize(wrapper); key.set(data, 0, data.length); data = encoder.getBytes(); value.set(data, 0, data.length); bytes += key.getLength() + value.getLength(); writer.append(key, value); } /* for (Entry<BigInteger,Metadata> entry: this.metadatas.entrySet()) { gts++; byte[] data = serializer.serialize(entry.getValue()); key.set(data, 0, data.length); GTSEncoder encoder = this.series.get(entry.getKey()); data = encoder.getBytes(); value.set(data, 0, data.length); bytes += key.getLength() + value.getLength(); writer.append(key, value); } */ } catch (IOException ioe) { ioe.printStackTrace(); throw ioe; } catch (Exception e) { e.printStackTrace(); throw new IOException(e); } writer.close(); nano = System.nanoTime() - nano; System.out.println("Dumped " + gts + " GTS (" + bytes + " bytes) in " + (nano / 1000000.0D) + " ms."); }
From source file:newprotobuf.mapred.ProtobufRecordReader.java
License:Open Source License
public synchronized boolean next(LongWritable key, BytesWritable value) throws IOException { size = 0;//ww w .ja v a 2 s .c o m boolean readend = readLittleEndianInt(in); if (readend) { LOG.info("read the pb file completely"); return false; } if (size < 0) { LOG.info("Parse the pbfile error:" + file.toUri().toString()); LOG.info("get size " + size); if (skipbad) { LOG.info("Skip the bad file"); reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1); return false; } else { throw (new IOException("Bad format pbfile")); } } pos += 2; if (size == 0) { value.set(buffer, 0, 0); return true; } pos += size; key.set(pos); int readlen = 0; if (size < buffer.length) { int already_read = 0; while (already_read < size) { readlen = in.read(buffer, already_read, size - already_read); if (readlen == -1) { if (already_read < size) { LOG.info("Parse the pbfile error:" + file.toUri().toString()); LOG.info("current read size" + readlen + " but expected size:" + size); if (skipbad) { LOG.info("Skip the bad file"); reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1); return false; } else { throw (new IOException("Bad format pbfile")); } } else break; } already_read += readlen; } value.set(buffer, 0, size); } else { byte[] tmp = new byte[size]; int already_read = 0; while (already_read < size) { readlen = in.read(tmp, already_read, size - already_read); if (readlen == -1) { if (already_read < size) { LOG.info("Parse the pbfile error:" + file.toUri().toString()); LOG.info("current read size" + readlen + " but expected size:" + size); if (skipbad) { LOG.info("Skip the bad file"); reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1); return false; } else { throw (new IOException("Bad format pbfile")); } } else break; } already_read += readlen; } value.set(tmp, 0, size); } return true; }
From source file:org.acacia.partitioner.java.WholeFileRecordReader.java
License:Apache License
@Override public boolean next(NullWritable key, BytesWritable value) throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null;/* w w w .java 2 s . co m*/ try { in = fs.open(file); IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:org.apache.avro.mapred.PipesCompatibleAvroRecordReader.java
License:Apache License
/** * If another key and value exist, updates the given key and value * BytesWritables and returns true. Else, returns false * /*from w w w . j ava 2 s . com*/ * @param key the BytesWritable key to update with the next key, * if it exists * @param value the BytesWritable value to update with the next * value, if it exists * @return true if the next key/value pair exists, otherwise * false * @throws IOException if the AvroRecordReader or the KeyValueGetter * return an IOException */ public boolean next(BytesWritable key, BytesWritable value) throws IOException { if (avroRecordReader.next(avroWrapper, nullWritable)) { keyValueGetter.give(avroWrapper.datum()); keyBytes = keyValueGetter.getKey(); valueBytes = keyValueGetter.getValue(); key.set(keyBytes, 0, keyBytes.length); value.set(valueBytes, 0, valueBytes.length); return true; } return false; }
From source file:org.apache.camel.component.hdfs.HdfsConsumerTest.java
License:Apache License
@Test public void testReadBytes() throws Exception { if (!canTest()) { return;// ww w . ja v a2 s .c o m } final Path file = new Path(new File("target/test/test-camel-bytes").getAbsolutePath()); Configuration conf = new Configuration(); FileSystem fs1 = FileSystem.get(file.toUri(), conf); SequenceFile.Writer writer = createWriter(fs1, conf, file, NullWritable.class, BytesWritable.class); NullWritable keyWritable = NullWritable.get(); BytesWritable valueWritable = new BytesWritable(); String value = "CIAO!"; valueWritable.set(value.getBytes(), 0, value.getBytes().length); writer.append(keyWritable, valueWritable); writer.sync(); writer.close(); MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class); resultEndpoint.expectedMessageCount(1); context.addRoutes(new RouteBuilder() { public void configure() { from("hdfs:///" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0") .to("mock:result"); } }); context.start(); resultEndpoint.assertIsSatisfied(); }
From source file:org.apache.camel.component.hdfs2.HdfsConsumerTest.java
License:Apache License
@Test public void testReadBytes() throws Exception { if (!canTest()) { return;// ww w.j a va 2s. c o m } final Path file = new Path(new File("target/test/test-camel-bytes").getAbsolutePath()); Configuration conf = new Configuration(); SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, BytesWritable.class); NullWritable keyWritable = NullWritable.get(); BytesWritable valueWritable = new BytesWritable(); String value = "CIAO!"; valueWritable.set(value.getBytes(), 0, value.getBytes().length); writer.append(keyWritable, valueWritable); writer.sync(); writer.close(); MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class); resultEndpoint.expectedMessageCount(1); context.addRoutes(new RouteBuilder() { public void configure() { from("hdfs2:///" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0") .to("mock:result"); } }); context.start(); resultEndpoint.assertIsSatisfied(); }