Example usage for org.apache.hadoop.io BytesWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable set.

Prototype

public void set(byte[] newData, int offset, int length)

Source Link

Document

Set the value to a copy of the given byte range

Usage

From source file:com.m6d.hive.protobuf.TestProto.java

License:Apache License

public void testNotdefined() throws Exception {
    Path p = new Path(this.ROOT_DIR, "nada");
    SequenceFile.Writer w = SequenceFile.createWriter(this.getFileSystem(), new Configuration(), p,
            BytesWritable.class, BytesWritable.class);

    AThing.Builder aThingBuild = AThing.newBuilder();
    AThing aThing = aThingBuild.addLuckynumbers(7).addLuckynumbers(4).addToys("car").build();
    AList.Builder aListBuild = AList.newBuilder();
    AList aList = aListBuild.addAge(2).addAge(3).addThings(aThing).build();

    BytesWritable key = new BytesWritable();
    BytesWritable value = new BytesWritable();
    ByteArrayOutputStream s = new ByteArrayOutputStream();
    aList.writeTo(s);/*from w w w  .ja  v  a  2  s  . co  m*/

    ByteArrayOutputStream t = new ByteArrayOutputStream();
    aList.writeTo(t);

    key.set(s.toByteArray(), 0, s.size());
    value.set(t.toByteArray(), 0, t.size());
    w.append(key, value);
    w.close();

    String jarFile;
    jarFile = KVAsVSeqFileBinaryInputFormat.class.getProtectionDomain().getCodeSource().getLocation().getFile();

    System.out.println("set hive.aux.jars.path=file:///" + jarFile);

    client.execute("add jar " + jarFile);
    client.execute("set hive.aux.jars.path=file:///" + jarFile);

    client.execute("create table  nada   " + " ROW FORMAT SERDE '" + ProtobufDeserializer.class.getName() + "'"
    //+ " WITH SERDEPROPERTIES ('KEY_SERIALIZE_CLASS'='" + Ex.AList.class.getName()
    //+ "','VALUE_SERIALIZE_CLASS'='" + Ex.AList.class.getName() + "'   )"
            + " STORED AS INPUTFORMAT '" + KVAsVSeqFileBinaryInputFormat.class.getName() + "'"
            + " OUTPUTFORMAT '" + SequenceFileOutputFormat.class.getName() + "'");

    client.execute("load data local inpath '" + p.toString() + "' into table nada");
    client.execute("SELECT key FROM nada");

    List<String> results = client.fetchAll();
    String expected = "null";
    //expected = "{\"agecount\":2,\"agelist\":[2,3],\"thingscount\":1,\"thingslist\":[{\"luckynumberscount\":2,\"luckynumberslist\":[7,4],\"toyscount\":1,\"toyslist\":[\"car\"]}]}";
    Assert.assertEquals(expected, results.get(0));
    client.execute("drop table nada");

}

From source file:com.ostor.dedup.hadoop.BinaryRecordReader.java

License:Open Source License

public boolean next(LongWritable key, BytesWritable value) {
    if (written == true)
        return false;

    logger.debug("Setting value of length - " + buffer.length);
    value.set(buffer, 0, buffer.length);
    written = true;/*from w  w w .  j a  v a2  s  .  c  om*/
    return true;
}

From source file:com.rim.logdriver.mapred.avro.AvroBlockRecordReader.java

License:Apache License

@Override
public boolean next(AvroFileHeader key, BytesWritable value) throws IOException {
    while (pos >= end) {
        if (in != null) {
            in.close();//from   w  w  w  .  j  a  v a2 s.  com
        }
        currentFile++;
        if (split.getNumPaths() > currentFile) {
            initCurrentFile();
        } else {
            return false;
        }
    }

    key.set(header);

    // Get the number of entries in the next block
    int entries = AvroUtils.readInt(in);
    byte[] block = AvroUtils.readBytes(in);

    // Check that the sync marker is what we expect
    LOG.trace("Verifying sync marker");
    byte[] syncMarker = AvroUtils.readBytes(in, DataFileConstants.SYNC_SIZE);
    if (!Arrays.equals(syncMarker, header.getSyncMarker())) {
        LOG.error("Sync marker does not match");
        return false;
    }

    // Now, pack it all back into a byte[], and set the value of value
    {
        ByteBuffer bb = ByteBuffer.allocate(10 + 10 + block.length);
        bb.put(AvroUtils.encodeLong(entries));
        bb.put(AvroUtils.encodeLong(block.length));
        bb.put(block);
        byte[] result = new byte[bb.position()];
        bb.rewind();
        bb.get(result);
        value.set(result, 0, result.length);

        pos = in.getPos();
    }

    return true;
}

From source file:com.yahoo.sketches.hive.theta.ExcludeSketchUDF.java

License:Apache License

/**
 * Main logic called by hive if sketchSize is also passed in. Computes the
 * hash in first sketch excluding the hash in second sketch of two sketches of
 * same or different column./*from  w w w  .  ja  v  a 2 s  .  c  o  m*/
 * 
 * @param firstSketchBytes
 *          first sketch to be included.
 * @param secondSketchBytes
 *          second sketch to be excluded.
 * @param hashSeed
 *          Only required if input sketches were constructed using an update seed that was not the default.
 * @return resulting sketch of exclusion.
 */
public BytesWritable evaluate(final BytesWritable firstSketchBytes, final BytesWritable secondSketchBytes,
        final long hashSeed) {

    Sketch firstSketch = null;
    if (firstSketchBytes != null && firstSketchBytes.getLength() > 0) {
        firstSketch = Sketch.wrap(new NativeMemory(firstSketchBytes.getBytes()), hashSeed);
    }

    Sketch secondSketch = null;
    if (secondSketchBytes != null && secondSketchBytes.getLength() > 0) {
        secondSketch = Sketch.wrap(new NativeMemory(secondSketchBytes.getBytes()), hashSeed);
    }

    final AnotB anotb = SetOperation.builder().setSeed(hashSeed).buildANotB();
    anotb.update(firstSketch, secondSketch);
    final byte[] excludeSketchBytes = anotb.getResult().toByteArray();
    final BytesWritable result = new BytesWritable();
    result.set(excludeSketchBytes, 0, excludeSketchBytes.length);
    return result;
}

From source file:com.yahoo.sketches.hive.theta.SampleSketchUDF.java

License:Apache License

/**
 * Main logic called by hive, produces new sketch from original using
 * specified size and sampling probablility.
 * //from w w w . j  a  v  a  2s.c o  m
 * @param binarySketch
 *          sketch to be sampled passed in as bytes writable.
 * @param sketchSize 
 *          Size to use for the new sketch.
 *          This must be a power of 2 and larger than 16. If zero, DEFAULT is used.
 * @param probability
 *          The sampling probability to use for the new sketch. 
 *          Should be greater than zero and less than or equal to 1.0 
 * @return The sampled sketch encoded as a BytesWritable
 */
public BytesWritable evaluate(BytesWritable binarySketch, int sketchSize, float probability) {

    // Null checks
    if (binarySketch == null) {
        return null;
    }

    byte[] serializedSketch = binarySketch.getBytes();

    if (serializedSketch.length <= 8) {
        return null;
    }

    //  The builder will catch errors with improper sketchSize or probability
    Union union = SetOperation.builder().setP(probability).buildUnion(sketchSize);

    union.update(new NativeMemory(serializedSketch)); //Union can accept Memory object directly

    Sketch intermediateSketch = union.getResult(false, null); //to CompactSketch(unordered, on-heap)
    byte[] resultSketch = intermediateSketch.toByteArray();

    BytesWritable result = new BytesWritable();
    result.set(resultSketch, 0, resultSketch.length);

    return result;
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public boolean next(NullWritable key, BytesWritable value) throws IOException {
        if (!processed) {
            byte[] contents = new byte[(int) fileSplit.getLength()];
            Path file = fileSplit.getPath();
            FileSystem fs = file.getFileSystem(conf);
            FSDataInputStream in = null;
            try {
                in = fs.open(file);/*w ww. j a  v a2s.  com*/
                IOUtils.readFully(in, contents, 0, contents.length);
                value.set(contents, 0, contents.length);
            } finally {
                IOUtils.closeStream(in);
            }
            processed = true;
            return true;
        }
        return false;
    }

From source file:hadoop.FrameReader.java

License:Apache License

/**
 * Read a line terminated by a custom delimiter.
 *//*from w  w  w . ja  v  a2 s .  co  m*/
public int readFrame(BytesWritable frame, int maxFrameLength, int maxBytesToConsume, double startDate,
        double endDate) throws IOException {
    /* We're reading data from inputStream, but the head of the stream may be
     *  already captured in the previous buffer, so we have several cases:
     * 
     * 1. The buffer tail does not contain any character sequence which
     *    matches with the head of delimiter. We count it as a 
     *    ambiguous byte count = 0
     *    
     * 2. The buffer tail contains a X number of characters,
     *    that forms a sequence, which matches with the
     *    head of delimiter. We count ambiguous byte count = X
     *    
     *    // ***  eg: A segment of input file is as follows
     *    
     *    " record 1792: I found this bug very interesting and
     *     I have completely read about it. record 1793: This bug
     *     can be solved easily record 1794: This ." 
     *    
     *    delimiter = "record";
     *        
     *    supposing:- String at the end of buffer =
     *    "I found this bug very interesting and I have completely re"
     *    There for next buffer = "ad about it. record 179       ...."           
     *     
     *     The matching characters in the input
     *     buffer tail and delimiter head = "re" 
     *     Therefore, ambiguous byte count = 2 ****   //
     *     
     *     2.1 If the following bytes are the remaining characters of
     *         the delimiter, then we have to capture only up to the starting 
     *         position of delimiter. That means, we need not include the 
     *         ambiguous characters in str.
     *     
     *     2.2 If the following bytes are not the remaining characters of
     *         the delimiter ( as mentioned in the example ), 
     *         then we have to include the ambiguous characters in str. 
     */
    int txtLength = 0; //tracks str.getLength(), as an optimization
    int separatorLength = 0; //length of terminating newline
    long bytesConsumed = 0;
    int delPosn = 0;
    int ambiguousByteCount = 0; // To capture the ambiguous characters count

    List<Byte> tram = new ArrayList<Byte>();

    do {
        int startPosn = bufferPosn; //starting from where we left off the last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            bufferLength = fillBuffer(in, buffer, ambiguousByteCount > 0);
            if (bufferLength <= 0)
                //on gere pas le fait d'avoir un fichier qui finit par zero
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) {
            if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) {
                delPosn++;
                if (delPosn >= recordDelimiterBytes.length) {
                    bufferPosn++;
                    break;
                }
            } else if (delPosn != 0) {//it means the last byte is zero
                //decode 
                byte nbZeros = buffer[bufferPosn];
                for (int k = 0; k < nbZeros; k++)
                    tram.add((byte) 0);

                delPosn = 0;
            } else { //case that we have delPos=0
                //ecrire le byte
                tram.add(buffer[bufferPosn]);
            }

        }
        //         int readLength = bufferPosn - startPosn;
        //
        //         bytesConsumed += readLength;
        //         int appendLength = readLength - delPosn;
        //         if (appendLength > maxFrameLength - txtLength) {
        //           appendLength = maxFrameLength - txtLength;
        //         }
        //         if (appendLength > 0) {
        //          
        //          frame.set(FrameReader.transformerByte(tram),0,tram.size());// LA faire qu' la fin
        //           txtLength += appendLength;
        //         }
        if (delPosn > 0 && delPosn < recordDelimiterBytes.length) {
            ambiguousByteCount = delPosn;
            bytesConsumed -= ambiguousByteCount; //to be consumed in next
        }
    } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume);

    // we have three cases depending on the Date of frame
    bf.clear();
    bf.put(OldFrameReader.transformerByte(tram.subList(0, 7)));
    bf.flip();
    double frameDate = bf.getDouble();
    if (frameDate > endDate) { // we skip the file
        return 0;
    } else if (frameDate < startDate)// we skip the frame 
        return readFrame(frame, maxFrameLength, maxBytesToConsume, startDate, endDate);
    else { // we read the frame
        frame.set(OldFrameReader.transformerByte(tram), 0, tram.size());
    }
    if (bytesConsumed > (long) Integer.MAX_VALUE) {
        throw new IOException("Too many bytes before delimiter: " + bytesConsumed);
    }
    return (int) bytesConsumed;
}

From source file:io.bfscan.data.VByteDocVector.java

License:Apache License

public static void toBytesWritable(BytesWritable bytes, int[] termids, int length) {
    try {/*from w w w  . j a  v  a2 s. c o  m*/
        if (termids == null) {
            termids = new int[] {};
            length = 0;
        }

        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
        DataOutputStream dataOut = new DataOutputStream(bytesOut);

        WritableUtils.writeVInt(dataOut, length);
        for (int i = 0; i < length; i++) {
            WritableUtils.writeVInt(dataOut, termids[i]);
        }

        byte[] raw = bytesOut.toByteArray();
        bytes.set(raw, 0, raw.length);
    } catch (IOException e) {
        bytes.set(new byte[] {}, 0, 0);
    }
}

From source file:io.covert.binary.analysis.BuildSequenceFile.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    File inDir = new File(args[0]);
    Path name = new Path(args[1]);

    Text key = new Text();
    BytesWritable val = new BytesWritable();

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, name, Text.class, BytesWritable.class,
            CompressionType.RECORD);//  ww  w. jav a  2 s.c o  m

    for (File file : inDir.listFiles()) {
        if (!file.isFile()) {
            System.out.println("Skipping " + file + " (not a file) ...");
            continue;
        }

        FileInputStream fileIn = new FileInputStream(file);
        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream((int) file.length());
        int b;
        while (-1 != (b = fileIn.read())) {
            bytesOut.write(b);
        }
        fileIn.close();
        bytesOut.close();
        byte[] bytes = bytesOut.toByteArray();

        val.set(bytes, 0, bytes.length);
        key.set(file.getName());

        writer.append(key, val);
    }
    writer.close();

    return 0;
}

From source file:io.covert.binary.analysis.BuildSequenceFileFromTarball.java

License:Apache License

public void load(FileSystem fs, Configuration conf, File inputTarball, Path outputDir) throws Exception {
    Text key = new Text();
    BytesWritable val = new BytesWritable();

    Path sequenceName = new Path(outputDir, inputTarball.getName() + ".seq");
    System.out.println("Writing to " + sequenceName);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class,
            BytesWritable.class, CompressionType.RECORD);

    InputStream is = new FileInputStream(inputTarball);
    if (inputTarball.toString().toLowerCase().endsWith(".gz")) {
        is = new GZIPInputStream(is);
    } else if (inputTarball.toString().toLowerCase().endsWith(".bz")
            || inputTarball.toString().endsWith(".bz2")) {
        is.read(); // read 'B'
        is.read(); // read 'Z'
        is = new CBZip2InputStream(is);
    }//  ww  w .j a  v a2  s  .co  m

    final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory()
            .createArchiveInputStream("tar", is);
    TarArchiveEntry entry = null;
    while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
        if (!entry.isDirectory()) {

            try {
                final ByteArrayOutputStream outputFileStream = new ByteArrayOutputStream();
                IOUtils.copy(debInputStream, outputFileStream);
                outputFileStream.close();
                byte[] outputFile = outputFileStream.toByteArray();
                val.set(outputFile, 0, outputFile.length);

                MessageDigest md = MessageDigest.getInstance("MD5");
                md.update(outputFile);
                byte[] digest = md.digest();
                String hexdigest = "";
                for (int i = 0; i < digest.length; i++) {
                    hexdigest += Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1);
                }
                key.set(hexdigest);
                writer.append(key, val);
            } catch (IOException e) {
                System.err.println("Warning: tarball may be truncated: " + inputTarball);
                // Truncated Tarball
                break;
            }
        }
    }
    debInputStream.close();
    writer.close();
}