List of usage examples for org.apache.hadoop.io BytesWritable set
public void set(byte[] newData, int offset, int length)
From source file:com.ebay.nest.io.sede.objectinspector.primitive.WritableBinaryObjectInspector.java
License:Apache License
@Override public BytesWritable set(Object o, byte[] bb) { BytesWritable incoming = (BytesWritable) o; if (bb != null) { incoming.set(bb, 0, bb.length); }/*from ww w . ja va 2 s . com*/ return incoming; }
From source file:com.endgame.binarypig.util.BuildSequenceFileFromArchive.java
License:Apache License
public void load(FileSystem fs, Configuration conf, File archive, Path outputDir) throws Exception { Text key = new Text(); BytesWritable val = new BytesWritable(); SequenceFile.Writer writer = null; ArchiveInputStream archiveInputStream = null; try {/*from www. j a v a 2 s .c om*/ Path sequenceName = new Path(outputDir, archive.getName() + ".seq"); System.out.println("Writing to " + sequenceName); writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class, BytesWritable.class, CompressionType.RECORD); String lowerName = archive.toString().toLowerCase(); if (lowerName.endsWith(".tar.gz") || lowerName.endsWith(".tgz")) { archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar", new GZIPInputStream(new FileInputStream(archive))); } else if (lowerName.endsWith(".tar.bz") || lowerName.endsWith(".tar.bz2") || lowerName.endsWith(".tbz")) { FileInputStream is = new FileInputStream(archive); is.read(); // read 'B' is.read(); // read 'Z' archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar", new CBZip2InputStream(is)); } else if (lowerName.endsWith(".tar")) { archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar", new FileInputStream(archive)); } else if (lowerName.endsWith(".zip")) { archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("zip", new FileInputStream(archive)); } else { throw new RuntimeException("Can't handle archive format for: " + archive); } ArchiveEntry entry = null; while ((entry = archiveInputStream.getNextEntry()) != null) { if (!entry.isDirectory()) { try { byte[] outputFile = IOUtils.toByteArray(archiveInputStream); val.set(outputFile, 0, outputFile.length); key.set(DigestUtils.md5Hex(outputFile)); writer.append(key, val); } catch (IOException e) { System.err.println("Warning: archive may be truncated: " + archive); // Truncated Archive break; } } } } finally { archiveInputStream.close(); writer.close(); } }
From source file:com.endgame.binarypig.util.BuildSequenceFileFromDir.java
License:Apache License
@Override public int run(String[] args) throws Exception { File inDir = new File(args[0]); Path name = new Path(args[1]); Text key = new Text(); BytesWritable val = new BytesWritable(); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, name, Text.class, BytesWritable.class, CompressionType.RECORD);// w w w.j a v a 2 s .c o m for (File file : inDir.listFiles()) { if (!file.isFile()) { System.out.println("Skipping " + file + " (not a file) ..."); continue; } byte[] bytes = FileUtils.readFileToByteArray(file); val.set(bytes, 0, bytes.length); key.set(DigestUtils.md5Hex(bytes)); writer.append(key, val); } writer.close(); return 0; }
From source file:com.facebook.presto.hive.util.TestSerDeUtils.java
License:Apache License
@Test public void testReuse() throws Exception { BytesWritable value = new BytesWritable(); byte[] first = "hello world".getBytes(UTF_8); value.set(first, 0, first.length); byte[] second = "bye".getBytes(UTF_8); value.set(second, 0, second.length); Type type = new TypeToken<Map<BytesWritable, Integer>>() { }.getType();/* ww w . j a v a2 s.c om*/ ObjectInspector inspector = getReflectionObjectInspector(type, ObjectInspectorOptions.JAVA); byte[] bytes = getJsonBytes(SESSION_TIME_ZONE, ImmutableMap.of(value, 0), inspector); String encoded = Base64Variants.getDefaultVariant().encode(second); assertEquals(new String(bytes, UTF_8), "{\"" + encoded + "\":0}"); }
From source file:com.gotometrics.orderly.FixedByteArrayRowKey.java
License:Apache License
protected Object toBytesWritable(Object o) { if (o == null || o instanceof BytesWritable) return o; else {/* ww w.ja v a2s. c o m*/ final BytesWritable bw = new BytesWritable(); final byte[] bytes = (byte[]) o; bw.set(bytes, 0, bytes.length); return bw; } }
From source file:com.gotometrics.orderly.FixedBytesWritableRowKey.java
License:Apache License
@Override public Object deserialize(ImmutableBytesWritable w) throws IOException { int offset = w.getOffset(); byte[] serialized = w.get(); final byte[] unmasked = maskAll(serialized, order, offset, length); RowKeyUtils.seek(w, length);/*from w w w. j a v a 2s. c o m*/ final BytesWritable result = new BytesWritable(); result.set(unmasked, offset, length); return result; }
From source file:com.huayu.metis.flume.sink.hdfs.HDFSWritableSerializer.java
License:Apache License
private BytesWritable makeByteWritable(Event e) { BytesWritable bytesObject = new BytesWritable(); bytesObject.set(e.getBody(), 0, e.getBody().length); return bytesObject; }
From source file:com.inmobi.messaging.consumer.databus.mapred.DatabusBytesWritableRecordReader.java
License:Apache License
public boolean next(LongWritable key, BytesWritable value) throws IOException { messageValue.clear();//from ww w .ja v a 2 s . c om boolean ret = databusReader.next(key, messageValue); if (ret) { // Get the byte array corresponding to the value read. Note that ByteBuffer.array() // should only be called when we know that the underlying array exactly contains // data payload (ie. no other leading/trailing characters). Here, databusReader.next() // will set a new ByteBuffer wrapped with a new byte[] each time, hence it is safe. byte[] data = messageValue.getData().array(); value.set(data, 0, data.length); } return ret; }
From source file:com.jfolson.hive.serde.RTypedBytesWritableInput.java
License:Apache License
public BytesWritable readBytes(BytesWritable bw) throws IOException { byte[] bytes = in.readBytes(); if (bw == null) { bw = new BytesWritable(bytes); } else {// www.j a va2s . c o m bw.set(bytes, 0, bytes.length); } return bw; }
From source file:com.lucidworks.hadoop.utils.ZipFileRecordReader.java
License:Apache License
/** * Each ZipEntry is decompressed and readied for the Mapper. If the * ZipFileInputFormat has been set to Lenient (not the default), certain * exceptions will be gracefully ignored to prevent a larger job from * failing./* w ww . j a v a 2s.c om*/ */ @Override public boolean next(Text key, BytesWritable value) throws IOException { { ZipEntry entry = null; try { entry = zip.getNextEntry(); } catch (Throwable e) { if (!ZipFileInputFormat.getLenient()) { throw new RuntimeException(e); } } // Sanity check if (entry == null) { processed = true; return false; } // Filename key.set(new Text(entry.getName())); byte[] bufferOut = null; int cummulativeBytesRead = 0; while (true) { int bytesRead = 0; byte[] bufferIn = new byte[8192]; try { bytesRead = zip.read(bufferIn, 0, bufferIn.length); } catch (Throwable e) { if (!ZipFileInputFormat.getLenient()) { throw new RuntimeException(e); } return false; } if (bytesRead > 0) { byte[] tmp = head(bufferIn, bytesRead); if (cummulativeBytesRead == 0) { bufferOut = tmp; } else { bufferOut = add(bufferOut, tmp); } cummulativeBytesRead += bytesRead; } else { break; } } try { zip.closeEntry(); } catch (IOException e) { if (!ZipFileInputFormat.getLenient()) { throw new RuntimeException(e); } } // Uncompressed contents if (bufferOut != null) { value.setCapacity(bufferOut.length); value.set(bufferOut, 0, bufferOut.length); } else { log.warn("bufferOut is null for " + key);//should we return false here? I don't think so, since I think that would mean we can't process any more records } return true; } }