Example usage for org.apache.hadoop.io BytesWritable set

List of usage examples for org.apache.hadoop.io BytesWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable set.

Prototype

public void set(byte[] newData, int offset, int length) 

Source Link

Document

Set the value to a copy of the given byte range

Usage

From source file:com.ebay.nest.io.sede.objectinspector.primitive.WritableBinaryObjectInspector.java

License:Apache License

@Override
public BytesWritable set(Object o, byte[] bb) {
    BytesWritable incoming = (BytesWritable) o;
    if (bb != null) {
        incoming.set(bb, 0, bb.length);
    }/*from  ww  w . ja va  2  s . com*/
    return incoming;
}

From source file:com.endgame.binarypig.util.BuildSequenceFileFromArchive.java

License:Apache License

public void load(FileSystem fs, Configuration conf, File archive, Path outputDir) throws Exception {
    Text key = new Text();
    BytesWritable val = new BytesWritable();

    SequenceFile.Writer writer = null;
    ArchiveInputStream archiveInputStream = null;

    try {/*from   www.  j  a  v  a 2 s .c  om*/
        Path sequenceName = new Path(outputDir, archive.getName() + ".seq");
        System.out.println("Writing to " + sequenceName);
        writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class, BytesWritable.class,
                CompressionType.RECORD);
        String lowerName = archive.toString().toLowerCase();

        if (lowerName.endsWith(".tar.gz") || lowerName.endsWith(".tgz")) {
            archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar",
                    new GZIPInputStream(new FileInputStream(archive)));
        } else if (lowerName.endsWith(".tar.bz") || lowerName.endsWith(".tar.bz2")
                || lowerName.endsWith(".tbz")) {
            FileInputStream is = new FileInputStream(archive);
            is.read(); // read 'B'
            is.read(); // read 'Z'
            archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar",
                    new CBZip2InputStream(is));
        } else if (lowerName.endsWith(".tar")) {
            archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar",
                    new FileInputStream(archive));
        } else if (lowerName.endsWith(".zip")) {
            archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("zip",
                    new FileInputStream(archive));
        } else {
            throw new RuntimeException("Can't handle archive format for: " + archive);
        }

        ArchiveEntry entry = null;
        while ((entry = archiveInputStream.getNextEntry()) != null) {
            if (!entry.isDirectory()) {
                try {
                    byte[] outputFile = IOUtils.toByteArray(archiveInputStream);
                    val.set(outputFile, 0, outputFile.length);
                    key.set(DigestUtils.md5Hex(outputFile));

                    writer.append(key, val);
                } catch (IOException e) {
                    System.err.println("Warning: archive may be truncated: " + archive);
                    // Truncated Archive
                    break;
                }
            }
        }
    } finally {
        archiveInputStream.close();
        writer.close();
    }
}

From source file:com.endgame.binarypig.util.BuildSequenceFileFromDir.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    File inDir = new File(args[0]);
    Path name = new Path(args[1]);

    Text key = new Text();
    BytesWritable val = new BytesWritable();

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, name, Text.class, BytesWritable.class,
            CompressionType.RECORD);//  w  w w.j  a v  a 2 s .c o m

    for (File file : inDir.listFiles()) {
        if (!file.isFile()) {
            System.out.println("Skipping " + file + " (not a file) ...");
            continue;
        }

        byte[] bytes = FileUtils.readFileToByteArray(file);
        val.set(bytes, 0, bytes.length);
        key.set(DigestUtils.md5Hex(bytes));
        writer.append(key, val);
    }
    writer.close();

    return 0;
}

From source file:com.facebook.presto.hive.util.TestSerDeUtils.java

License:Apache License

@Test
public void testReuse() throws Exception {
    BytesWritable value = new BytesWritable();

    byte[] first = "hello world".getBytes(UTF_8);
    value.set(first, 0, first.length);

    byte[] second = "bye".getBytes(UTF_8);
    value.set(second, 0, second.length);

    Type type = new TypeToken<Map<BytesWritable, Integer>>() {
    }.getType();/* ww w  . j  a v a2 s.c om*/
    ObjectInspector inspector = getReflectionObjectInspector(type, ObjectInspectorOptions.JAVA);

    byte[] bytes = getJsonBytes(SESSION_TIME_ZONE, ImmutableMap.of(value, 0), inspector);

    String encoded = Base64Variants.getDefaultVariant().encode(second);
    assertEquals(new String(bytes, UTF_8), "{\"" + encoded + "\":0}");
}

From source file:com.gotometrics.orderly.FixedByteArrayRowKey.java

License:Apache License

protected Object toBytesWritable(Object o) {
    if (o == null || o instanceof BytesWritable)
        return o;
    else {/*  ww  w.ja  v a2s.  c o  m*/
        final BytesWritable bw = new BytesWritable();
        final byte[] bytes = (byte[]) o;
        bw.set(bytes, 0, bytes.length);
        return bw;
    }
}

From source file:com.gotometrics.orderly.FixedBytesWritableRowKey.java

License:Apache License

@Override
public Object deserialize(ImmutableBytesWritable w) throws IOException {
    int offset = w.getOffset();
    byte[] serialized = w.get();

    final byte[] unmasked = maskAll(serialized, order, offset, length);

    RowKeyUtils.seek(w, length);/*from w  w w.  j a v a  2s. c  o m*/

    final BytesWritable result = new BytesWritable();
    result.set(unmasked, offset, length);
    return result;
}

From source file:com.huayu.metis.flume.sink.hdfs.HDFSWritableSerializer.java

License:Apache License

private BytesWritable makeByteWritable(Event e) {
    BytesWritable bytesObject = new BytesWritable();
    bytesObject.set(e.getBody(), 0, e.getBody().length);
    return bytesObject;
}

From source file:com.inmobi.messaging.consumer.databus.mapred.DatabusBytesWritableRecordReader.java

License:Apache License

public boolean next(LongWritable key, BytesWritable value) throws IOException {
    messageValue.clear();//from ww  w .ja v a  2  s .  c  om
    boolean ret = databusReader.next(key, messageValue);
    if (ret) {
        // Get the byte array corresponding to the value read. Note that ByteBuffer.array()
        // should only be called when we know that the underlying array exactly contains 
        // data payload (ie. no other leading/trailing characters). Here, databusReader.next()
        // will set a new ByteBuffer wrapped with a new byte[] each time, hence it is safe.
        byte[] data = messageValue.getData().array();
        value.set(data, 0, data.length);
    }
    return ret;
}

From source file:com.jfolson.hive.serde.RTypedBytesWritableInput.java

License:Apache License

public BytesWritable readBytes(BytesWritable bw) throws IOException {
    byte[] bytes = in.readBytes();
    if (bw == null) {
        bw = new BytesWritable(bytes);
    } else {// www.j a  va2s . c o m
        bw.set(bytes, 0, bytes.length);
    }
    return bw;
}

From source file:com.lucidworks.hadoop.utils.ZipFileRecordReader.java

License:Apache License

/**
 * Each ZipEntry is decompressed and readied for the Mapper. If the
 * ZipFileInputFormat has been set to Lenient (not the default), certain
 * exceptions will be gracefully ignored to prevent a larger job from
 * failing./*  w ww . j a  v  a 2s.c  om*/
 */

@Override
public boolean next(Text key, BytesWritable value) throws IOException {
    {
        ZipEntry entry = null;
        try {
            entry = zip.getNextEntry();
        } catch (Throwable e) {
            if (!ZipFileInputFormat.getLenient()) {
                throw new RuntimeException(e);
            }
        }

        // Sanity check
        if (entry == null) {
            processed = true;
            return false;
        }

        // Filename
        key.set(new Text(entry.getName()));

        byte[] bufferOut = null;
        int cummulativeBytesRead = 0;
        while (true) {
            int bytesRead = 0;
            byte[] bufferIn = new byte[8192];
            try {
                bytesRead = zip.read(bufferIn, 0, bufferIn.length);
            } catch (Throwable e) {
                if (!ZipFileInputFormat.getLenient()) {
                    throw new RuntimeException(e);
                }
                return false;
            }
            if (bytesRead > 0) {
                byte[] tmp = head(bufferIn, bytesRead);
                if (cummulativeBytesRead == 0) {
                    bufferOut = tmp;
                } else {
                    bufferOut = add(bufferOut, tmp);
                }
                cummulativeBytesRead += bytesRead;
            } else {
                break;
            }
        }
        try {
            zip.closeEntry();
        } catch (IOException e) {
            if (!ZipFileInputFormat.getLenient()) {
                throw new RuntimeException(e);
            }
        }
        // Uncompressed contents
        if (bufferOut != null) {
            value.setCapacity(bufferOut.length);
            value.set(bufferOut, 0, bufferOut.length);
        } else {
            log.warn("bufferOut is null for " + key);//should we return false here?  I don't think so, since I think that would mean we can't process any more records
        }
        return true;
    }
}