Example usage for org.apache.hadoop.io BytesWritable set

List of usage examples for org.apache.hadoop.io BytesWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable set.

Prototype

public void set(byte[] newData, int offset, int length) 

Source Link

Document

Set the value to a copy of the given byte range

Usage

From source file:com.m6d.hive.protobuf.TestProto.java

License:Apache License

public void testNotdefined() throws Exception {
    Path p = new Path(this.ROOT_DIR, "nada");
    SequenceFile.Writer w = SequenceFile.createWriter(this.getFileSystem(), new Configuration(), p,
            BytesWritable.class, BytesWritable.class);

    AThing.Builder aThingBuild = AThing.newBuilder();
    AThing aThing = aThingBuild.addLuckynumbers(7).addLuckynumbers(4).addToys("car").build();
    AList.Builder aListBuild = AList.newBuilder();
    AList aList = aListBuild.addAge(2).addAge(3).addThings(aThing).build();

    BytesWritable key = new BytesWritable();
    BytesWritable value = new BytesWritable();
    ByteArrayOutputStream s = new ByteArrayOutputStream();
    aList.writeTo(s);/*from w w w  .ja  v  a  2  s  . co  m*/

    ByteArrayOutputStream t = new ByteArrayOutputStream();
    aList.writeTo(t);

    key.set(s.toByteArray(), 0, s.size());
    value.set(t.toByteArray(), 0, t.size());
    w.append(key, value);
    w.close();

    String jarFile;
    jarFile = KVAsVSeqFileBinaryInputFormat.class.getProtectionDomain().getCodeSource().getLocation().getFile();

    System.out.println("set hive.aux.jars.path=file:///" + jarFile);

    client.execute("add jar " + jarFile);
    client.execute("set hive.aux.jars.path=file:///" + jarFile);

    client.execute("create table  nada   " + " ROW FORMAT SERDE '" + ProtobufDeserializer.class.getName() + "'"
    //+ " WITH SERDEPROPERTIES ('KEY_SERIALIZE_CLASS'='" + Ex.AList.class.getName()
    //+ "','VALUE_SERIALIZE_CLASS'='" + Ex.AList.class.getName() + "'   )"
            + " STORED AS INPUTFORMAT '" + KVAsVSeqFileBinaryInputFormat.class.getName() + "'"
            + " OUTPUTFORMAT '" + SequenceFileOutputFormat.class.getName() + "'");

    client.execute("load data local inpath '" + p.toString() + "' into table nada");
    client.execute("SELECT key FROM nada");

    List<String> results = client.fetchAll();
    String expected = "null";
    //expected = "{\"agecount\":2,\"agelist\":[2,3],\"thingscount\":1,\"thingslist\":[{\"luckynumberscount\":2,\"luckynumberslist\":[7,4],\"toyscount\":1,\"toyslist\":[\"car\"]}]}";
    Assert.assertEquals(expected, results.get(0));
    client.execute("drop table nada");

}

From source file:com.ostor.dedup.hadoop.BinaryRecordReader.java

License:Open Source License

public boolean next(LongWritable key, BytesWritable value) {
    if (written == true)
        return false;

    logger.debug("Setting value of length - " + buffer.length);
    value.set(buffer, 0, buffer.length);
    written = true;/*from w  w w .  j a  v a2  s  .  c  om*/
    return true;
}

From source file:com.rim.logdriver.mapred.avro.AvroBlockRecordReader.java

License:Apache License

@Override
public boolean next(AvroFileHeader key, BytesWritable value) throws IOException {
    while (pos >= end) {
        if (in != null) {
            in.close();//from   w  w  w  .  j  a  v a2 s.  com
        }
        currentFile++;
        if (split.getNumPaths() > currentFile) {
            initCurrentFile();
        } else {
            return false;
        }
    }

    key.set(header);

    // Get the number of entries in the next block
    int entries = AvroUtils.readInt(in);
    byte[] block = AvroUtils.readBytes(in);

    // Check that the sync marker is what we expect
    LOG.trace("Verifying sync marker");
    byte[] syncMarker = AvroUtils.readBytes(in, DataFileConstants.SYNC_SIZE);
    if (!Arrays.equals(syncMarker, header.getSyncMarker())) {
        LOG.error("Sync marker does not match");
        return false;
    }

    // Now, pack it all back into a byte[], and set the value of value
    {
        ByteBuffer bb = ByteBuffer.allocate(10 + 10 + block.length);
        bb.put(AvroUtils.encodeLong(entries));
        bb.put(AvroUtils.encodeLong(block.length));
        bb.put(block);
        byte[] result = new byte[bb.position()];
        bb.rewind();
        bb.get(result);
        value.set(result, 0, result.length);

        pos = in.getPos();
    }

    return true;
}

From source file:com.yahoo.sketches.hive.theta.ExcludeSketchUDF.java

License:Apache License

/**
 * Main logic called by hive if sketchSize is also passed in. Computes the
 * hash in first sketch excluding the hash in second sketch of two sketches of
 * same or different column./*from  w w w  .  ja  v  a 2 s  .  c  o  m*/
 * 
 * @param firstSketchBytes
 *          first sketch to be included.
 * @param secondSketchBytes
 *          second sketch to be excluded.
 * @param hashSeed
 *          Only required if input sketches were constructed using an update seed that was not the default.
 * @return resulting sketch of exclusion.
 */
public BytesWritable evaluate(final BytesWritable firstSketchBytes, final BytesWritable secondSketchBytes,
        final long hashSeed) {

    Sketch firstSketch = null;
    if (firstSketchBytes != null && firstSketchBytes.getLength() > 0) {
        firstSketch = Sketch.wrap(new NativeMemory(firstSketchBytes.getBytes()), hashSeed);
    }

    Sketch secondSketch = null;
    if (secondSketchBytes != null && secondSketchBytes.getLength() > 0) {
        secondSketch = Sketch.wrap(new NativeMemory(secondSketchBytes.getBytes()), hashSeed);
    }

    final AnotB anotb = SetOperation.builder().setSeed(hashSeed).buildANotB();
    anotb.update(firstSketch, secondSketch);
    final byte[] excludeSketchBytes = anotb.getResult().toByteArray();
    final BytesWritable result = new BytesWritable();
    result.set(excludeSketchBytes, 0, excludeSketchBytes.length);
    return result;
}

From source file:com.yahoo.sketches.hive.theta.SampleSketchUDF.java

License:Apache License

/**
 * Main logic called by hive, produces new sketch from original using
 * specified size and sampling probablility.
 * //from w w w . j  a  v  a  2s.c o  m
 * @param binarySketch
 *          sketch to be sampled passed in as bytes writable.
 * @param sketchSize 
 *          Size to use for the new sketch.
 *          This must be a power of 2 and larger than 16. If zero, DEFAULT is used.
 * @param probability
 *          The sampling probability to use for the new sketch. 
 *          Should be greater than zero and less than or equal to 1.0 
 * @return The sampled sketch encoded as a BytesWritable
 */
public BytesWritable evaluate(BytesWritable binarySketch, int sketchSize, float probability) {

    // Null checks
    if (binarySketch == null) {
        return null;
    }

    byte[] serializedSketch = binarySketch.getBytes();

    if (serializedSketch.length <= 8) {
        return null;
    }

    //  The builder will catch errors with improper sketchSize or probability
    Union union = SetOperation.builder().setP(probability).buildUnion(sketchSize);

    union.update(new NativeMemory(serializedSketch)); //Union can accept Memory object directly

    Sketch intermediateSketch = union.getResult(false, null); //to CompactSketch(unordered, on-heap)
    byte[] resultSketch = intermediateSketch.toByteArray();

    BytesWritable result = new BytesWritable();
    result.set(resultSketch, 0, resultSketch.length);

    return result;
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public boolean next(NullWritable key, BytesWritable value) throws IOException {
        if (!processed) {
            byte[] contents = new byte[(int) fileSplit.getLength()];
            Path file = fileSplit.getPath();
            FileSystem fs = file.getFileSystem(conf);
            FSDataInputStream in = null;
            try {
                in = fs.open(file);/*w ww. j a  v a2s.  com*/
                IOUtils.readFully(in, contents, 0, contents.length);
                value.set(contents, 0, contents.length);
            } finally {
                IOUtils.closeStream(in);
            }
            processed = true;
            return true;
        }
        return false;
    }

From source file:hadoop.FrameReader.java

License:Apache License

/**
 * Read a line terminated by a custom delimiter.
 *//*from w  w  w . ja  v  a2 s .  co  m*/
public int readFrame(BytesWritable frame, int maxFrameLength, int maxBytesToConsume, double startDate,
        double endDate) throws IOException {
    /* We're reading data from inputStream, but the head of the stream may be
     *  already captured in the previous buffer, so we have several cases:
     * 
     * 1. The buffer tail does not contain any character sequence which
     *    matches with the head of delimiter. We count it as a 
     *    ambiguous byte count = 0
     *    
     * 2. The buffer tail contains a X number of characters,
     *    that forms a sequence, which matches with the
     *    head of delimiter. We count ambiguous byte count = X
     *    
     *    // ***  eg: A segment of input file is as follows
     *    
     *    " record 1792: I found this bug very interesting and
     *     I have completely read about it. record 1793: This bug
     *     can be solved easily record 1794: This ." 
     *    
     *    delimiter = "record";
     *        
     *    supposing:- String at the end of buffer =
     *    "I found this bug very interesting and I have completely re"
     *    There for next buffer = "ad about it. record 179       ...."           
     *     
     *     The matching characters in the input
     *     buffer tail and delimiter head = "re" 
     *     Therefore, ambiguous byte count = 2 ****   //
     *     
     *     2.1 If the following bytes are the remaining characters of
     *         the delimiter, then we have to capture only up to the starting 
     *         position of delimiter. That means, we need not include the 
     *         ambiguous characters in str.
     *     
     *     2.2 If the following bytes are not the remaining characters of
     *         the delimiter ( as mentioned in the example ), 
     *         then we have to include the ambiguous characters in str. 
     */
    int txtLength = 0; //tracks str.getLength(), as an optimization
    int separatorLength = 0; //length of terminating newline
    long bytesConsumed = 0;
    int delPosn = 0;
    int ambiguousByteCount = 0; // To capture the ambiguous characters count

    List<Byte> tram = new ArrayList<Byte>();

    do {
        int startPosn = bufferPosn; //starting from where we left off the last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            bufferLength = fillBuffer(in, buffer, ambiguousByteCount > 0);
            if (bufferLength <= 0)
                //on gere pas le fait d'avoir un fichier qui finit par zero
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) {
            if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) {
                delPosn++;
                if (delPosn >= recordDelimiterBytes.length) {
                    bufferPosn++;
                    break;
                }
            } else if (delPosn != 0) {//it means the last byte is zero
                //decode 
                byte nbZeros = buffer[bufferPosn];
                for (int k = 0; k < nbZeros; k++)
                    tram.add((byte) 0);

                delPosn = 0;
            } else { //case that we have delPos=0
                //ecrire le byte
                tram.add(buffer[bufferPosn]);
            }

        }
        //         int readLength = bufferPosn - startPosn;
        //
        //         bytesConsumed += readLength;
        //         int appendLength = readLength - delPosn;
        //         if (appendLength > maxFrameLength - txtLength) {
        //           appendLength = maxFrameLength - txtLength;
        //         }
        //         if (appendLength > 0) {
        //          
        //          frame.set(FrameReader.transformerByte(tram),0,tram.size());// LA faire qu' la fin
        //           txtLength += appendLength;
        //         }
        if (delPosn > 0 && delPosn < recordDelimiterBytes.length) {
            ambiguousByteCount = delPosn;
            bytesConsumed -= ambiguousByteCount; //to be consumed in next
        }
    } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume);

    // we have three cases depending on the Date of frame
    bf.clear();
    bf.put(OldFrameReader.transformerByte(tram.subList(0, 7)));
    bf.flip();
    double frameDate = bf.getDouble();
    if (frameDate > endDate) { // we skip the file
        return 0;
    } else if (frameDate < startDate)// we skip the frame 
        return readFrame(frame, maxFrameLength, maxBytesToConsume, startDate, endDate);
    else { // we read the frame
        frame.set(OldFrameReader.transformerByte(tram), 0, tram.size());
    }
    if (bytesConsumed > (long) Integer.MAX_VALUE) {
        throw new IOException("Too many bytes before delimiter: " + bytesConsumed);
    }
    return (int) bytesConsumed;
}

From source file:io.bfscan.data.VByteDocVector.java

License:Apache License

public static void toBytesWritable(BytesWritable bytes, int[] termids, int length) {
    try {/*from w w w  . j a  v  a2 s. c o  m*/
        if (termids == null) {
            termids = new int[] {};
            length = 0;
        }

        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
        DataOutputStream dataOut = new DataOutputStream(bytesOut);

        WritableUtils.writeVInt(dataOut, length);
        for (int i = 0; i < length; i++) {
            WritableUtils.writeVInt(dataOut, termids[i]);
        }

        byte[] raw = bytesOut.toByteArray();
        bytes.set(raw, 0, raw.length);
    } catch (IOException e) {
        bytes.set(new byte[] {}, 0, 0);
    }
}

From source file:io.covert.binary.analysis.BuildSequenceFile.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    File inDir = new File(args[0]);
    Path name = new Path(args[1]);

    Text key = new Text();
    BytesWritable val = new BytesWritable();

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, name, Text.class, BytesWritable.class,
            CompressionType.RECORD);//  ww  w. jav a  2 s.c o  m

    for (File file : inDir.listFiles()) {
        if (!file.isFile()) {
            System.out.println("Skipping " + file + " (not a file) ...");
            continue;
        }

        FileInputStream fileIn = new FileInputStream(file);
        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream((int) file.length());
        int b;
        while (-1 != (b = fileIn.read())) {
            bytesOut.write(b);
        }
        fileIn.close();
        bytesOut.close();
        byte[] bytes = bytesOut.toByteArray();

        val.set(bytes, 0, bytes.length);
        key.set(file.getName());

        writer.append(key, val);
    }
    writer.close();

    return 0;
}

From source file:io.covert.binary.analysis.BuildSequenceFileFromTarball.java

License:Apache License

public void load(FileSystem fs, Configuration conf, File inputTarball, Path outputDir) throws Exception {
    Text key = new Text();
    BytesWritable val = new BytesWritable();

    Path sequenceName = new Path(outputDir, inputTarball.getName() + ".seq");
    System.out.println("Writing to " + sequenceName);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class,
            BytesWritable.class, CompressionType.RECORD);

    InputStream is = new FileInputStream(inputTarball);
    if (inputTarball.toString().toLowerCase().endsWith(".gz")) {
        is = new GZIPInputStream(is);
    } else if (inputTarball.toString().toLowerCase().endsWith(".bz")
            || inputTarball.toString().endsWith(".bz2")) {
        is.read(); // read 'B'
        is.read(); // read 'Z'
        is = new CBZip2InputStream(is);
    }//  ww  w .j a  v a2  s  .co  m

    final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory()
            .createArchiveInputStream("tar", is);
    TarArchiveEntry entry = null;
    while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
        if (!entry.isDirectory()) {

            try {
                final ByteArrayOutputStream outputFileStream = new ByteArrayOutputStream();
                IOUtils.copy(debInputStream, outputFileStream);
                outputFileStream.close();
                byte[] outputFile = outputFileStream.toByteArray();
                val.set(outputFile, 0, outputFile.length);

                MessageDigest md = MessageDigest.getInstance("MD5");
                md.update(outputFile);
                byte[] digest = md.digest();
                String hexdigest = "";
                for (int i = 0; i < digest.length; i++) {
                    hexdigest += Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1);
                }
                key.set(hexdigest);
                writer.append(key, val);
            } catch (IOException e) {
                System.err.println("Warning: tarball may be truncated: " + inputTarball);
                // Truncated Tarball
                break;
            }
        }
    }
    debInputStream.close();
    writer.close();
}