Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:mvm.rya.accumulo.pig.AccumuloStorage.java

License:Apache License

public void putNext(Tuple t) throws ExecException, IOException {
    Mutation mut = new Mutation(objToText(t.get(0)));
    Text cf = objToText(t.get(1));
    Text cq = objToText(t.get(2));

    if (t.size() > 4) {
        Text cv = objToText(t.get(3));
        Value val = new Value(objToBytes(t.get(4)));
        if (cv.getLength() == 0) {
            mut.put(cf, cq, val);
        } else {/*from   www .  ja v  a  2 s .  c  o m*/
            mut.put(cf, cq, new ColumnVisibility(cv), val);
        }
    } else {
        Value val = new Value(objToBytes(t.get(3)));
        mut.put(cf, cq, val);
    }

    try {
        writer.write(tableName, mut);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}

From source file:mvm.rya.indexing.accumulo.freetext.ColumnPrefixes.java

License:Apache License

public static Text removePrefix(Text termWithPrefix) {
    Text temp = new Text();
    temp.set(termWithPrefix.getBytes(), 2, termWithPrefix.getLength() - 2);
    return temp;/* www  .  j  a v  a2  s  .co  m*/
}

From source file:net.mooncloud.hadoop.hive.ql.udf.UDFMd5.java

License:Apache License

/**
 * Convert String to md5/*from  w ww  .  j  a  v a  2  s. c  om*/
 */
public Text evaluate(Text n) {
    if (n == null) {
        return null;
    }

    digest.reset();
    digest.update(n.getBytes(), 0, n.getLength());
    byte[] md5Bytes = digest.digest();
    String md5Hex = Hex.encodeHexString(md5Bytes);

    result.set(md5Hex);
    return result;
}

From source file:net.mooncloud.hadoop.hive.ql.udf.UDFRSASign.java

License:Apache License

public BytesWritable evaluate(Text n, Text privateKey) {
    if (n == null || privateKey == null) {
        return null;
    }/*from  w  w  w  . j  a  v a2  s .co m*/

    try {
        byte[] bytes = new byte[privateKey.getLength()];
        System.arraycopy(privateKey.getBytes(), 0, bytes, 0, privateKey.getLength());
        byte[] decoded = Base64.decodeBase64(bytes);

        result = new BytesWritable(RSAUtils.sign(n.getBytes(), decoded));
    } catch (Exception e) {
        e.printStackTrace();
    }
    return result;
}

From source file:net.mooncloud.hadoop.hive.ql.udf.UDFRSAVerify.java

License:Apache License

public BooleanWritable evaluate(Text n, Text sign, Text publicKey) {
    if (n == null || sign == null || publicKey == null) {
        return null;
    }//from w  w w .j  a  v  a  2s. c  o  m

    try {
        byte[] publicKeybytes = new byte[publicKey.getLength()];
        System.arraycopy(publicKey.getBytes(), 0, publicKeybytes, 0, publicKey.getLength());
        byte[] publicKeydecoded = Base64.decodeBase64(publicKeybytes);

        byte[] signbytes = new byte[sign.getLength()];
        System.arraycopy(sign.getBytes(), 0, signbytes, 0, sign.getLength());
        byte[] signdecoded = Base64.decodeBase64(signbytes);

        result = new BooleanWritable(RSAUtils.verify(n.getBytes(), publicKeydecoded, signdecoded));
    } catch (Exception e) {
        e.printStackTrace();
    }
    return result;
}

From source file:net.mooncloud.hadoop.hive.ql.udf.UDFUnbase64.java

License:Apache License

public BytesWritable evaluate(Text value) {
    if (value == null) {
        return null;
    }/*  w  ww. j  a  v a2s.c  o  m*/
    byte[] bytes = new byte[value.getLength()];
    System.arraycopy(value.getBytes(), 0, bytes, 0, value.getLength());
    byte[] decoded = Base64.decodeBase64(bytes);
    result.set(decoded, 0, decoded.length);
    return result;
}

From source file:nl.basjes.hadoop.io.compress.TestSplittableCodecSeams.java

License:Apache License

/**
 * This test checks if reading the file in a splitted way results
 * in the same lines as reading the file as a single 'split'.
 *//*  www . j ava 2 s .  c  om*/
private void validateSplitSeams(final Configuration conf, final FileSystem fs, final Path filename,
        final Class<? extends SplittableCompressionCodec> codecClass, final long splitSize,
        final long recordsInFile, final long lastSplitSizeLimit) throws IOException {
    // To make the test predictable
    conf.setInt("io.file.buffer.size", BUFFER_SIZE);

    final FileStatus infile = fs.getFileStatus(filename);
    final long inputLength = infile.getLen();

    if (inputLength > Integer.MAX_VALUE) {
        fail("Bad test file length.");
    }

    LOG.info("Input is " + inputLength + " bytes. " + "making a split every " + splitSize + " bytes.");

    if (inputLength <= splitSize) {
        fail("The compressed test file is too small to do any useful testing.");
    }

    final SplittableCompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);

    /*
     * The validation is done as follows:
     * 1) We open the entire file as a single split as the reference
     * 2) We create a sequence of splits and validate each line with the
     *    reference split.
     * The lines from these two must match 100%.
     */

    final Text refLine = new Text();
    final Decompressor refDcmp = CodecPool.getDecompressor(codec);
    assertNotNull("Unable to load the decompressor for codec \"" + codec.getClass().getName() + "\"", refDcmp);

    final SplitCompressionInputStream refStream = codec.createInputStream(fs.open(infile.getPath()), refDcmp, 0,
            inputLength, SplittableCompressionCodec.READ_MODE.BYBLOCK);
    final LineReader refReader = new LineReader(refStream, conf);

    final Text line = new Text();
    final Decompressor dcmp = CodecPool.getDecompressor(codec);
    assertNotNull("Unable to load the decompressor for codec \"" + codec.getClass().getName() + "\"", refDcmp);

    try {
        long start = 0;
        long end = splitSize;
        int splitCount = 0;
        long refLineNumber = 0;
        long splitLineNumber;

        while (end <= inputLength) {
            splitLineNumber = 0;
            ++splitCount;
            LOG.debug("-------------------------------------------------------");
            dcmp.reset(); // Reset the Decompressor for reuse with the new stream

            final SplitCompressionInputStream splitStream = codec.createInputStream(fs.open(infile.getPath()),
                    dcmp, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);

            final long adjustedStart = splitStream.getAdjustedStart();
            final long adjustedEnd = splitStream.getAdjustedEnd();

            if (LOG.isDebugEnabled()) {
                LOG.debug("Doing split " + splitCount + " on range " + " (" + start + "-" + end + ")"
                        + " adjusted to (" + adjustedStart + "-" + adjustedEnd + ")");
            }

            final LineReader lreader = new LineReader(splitStream, conf);

            if (start != 0) {
                // Not the first split so we discard the first (incomplete) line.
                int readChars = lreader.readLine(line);
                if (LOG.isTraceEnabled()) {
                    LOG.trace("DISCARD LINE " + 0 + " in split " + splitCount + " pos=" + splitStream.getPos()
                            + " length=" + readChars + ": \"" + line + "\"");
                }
            }

            // Now read until the end of this split
            while (nextKeyValue(splitStream, lreader, adjustedEnd, line)) {
                ++splitLineNumber;

                // Get the reference value
                if (!nextKeyValue(refStream, refReader, inputLength, refLine)) {
                    LOG.error(String.format("S>%05d: %s", splitLineNumber, line));
                    fail("Split goes beyond the end of the reference with line number " + splitLineNumber);
                }
                ++refLineNumber;

                if (LOG.isDebugEnabled() && refLineNumber > (recordsInFile - 10)) {
                    LOG.debug(String.format("R<%05d: %s", refLineNumber, refLine));
                    LOG.debug(String.format("S>%05d: %s", splitLineNumber, line));
                }

                assertEquals("Line must be same in reference and in split at line " + refLineNumber, refLine,
                        line);

                if (LOG.isTraceEnabled()) {
                    LOG.trace("LINE " + splitLineNumber + " in split " + splitCount + " (" + refLineNumber
                            + ") pos=" + splitStream.getPos() + " length=" + line.getLength() + ": \"" + line
                            + "\"");
                }
            }

            // We just read through the entire split
            LOG.debug("Checked split " + splitCount + " (" + adjustedStart + "-" + adjustedEnd + ") "
                    + "containing " + splitLineNumber + " lines.");

            if (end == inputLength) {
                LOG.info("====================> Finished the last split <====================");
                break; // We've reached the end of the last split
            }

            // Determine start and end for the next split
            start = end;

            if ((end + lastSplitSizeLimit) > inputLength) {
                end = inputLength;
                LOG.info("====================> Starting the last split (" + start + " - " + end
                        + ") <====================");
            } else {
                end += splitSize;
                LOG.info("====================> Starting the next split (" + start + " - " + end
                        + ") <====================");
            }

        }

        if (nextKeyValue(refStream, refReader, inputLength, refLine)) {
            ++refLineNumber;
            LOG.error(String.format("R<%05d: %s", refLineNumber, refLine));
            fail("The reference is at least one line longer than the last split ( " + "splitSize=" + splitSize
                    + ", " + "inputLength= " + inputLength + ", " + "split start=" + start + ", " + "split end="
                    + end + ", " + "line=" + refLineNumber + ")");
        }

        LOG.info("Verified " + refLineNumber + " lines in " + splitCount + " splits.");

    } finally {
        CodecPool.returnDecompressor(dcmp);
        CodecPool.returnDecompressor(refDcmp);
    }
}

From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.NReadRecordReader.java

License:Open Source License

/**
 * Override method that if the file has not already been read, reads it into memory, so that a call to
 * getCurrentValue() will return the lines this file as Text. Then, returns true. If it has already been read,
 * then returns false without updating any internal state.
 *
 * @return Boolean whether the file was read or not.
 * @throws IOException          If there is an error reading the file.
 * @throws InterruptedException If there is an error.
 *///w  w w . j  av a 2s .  c  o m
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {

    // Initialize key and value.
    if (this.key == null) {
        this.key = new LongWritable();
    }
    if (this.value == null) {
        this.value = new Text();
    }

    // Get the key and value.
    this.key.set(this.pos);
    this.value.clear();
    Text endline = new Text("\n");
    int newSize = 0;
    for (int i = 0; i < this.NLINESTOPROCESS; i++) {
        Text v = new Text();
        while (this.pos < this.end) {
            newSize = this.in.readLine(v, this.maxLineLength,
                    Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.pos), this.maxLineLength));
            this.value.append(v.getBytes(), 0, v.getLength());
            this.value.append(endline.getBytes(), 0, endline.getLength());
            if (newSize == 0) {
                break;
            }
            this.pos += newSize;
            if (newSize < this.maxLineLength) {
                break;
            }
        }
    }

    // If newSize is still zero, return false, else true.
    if (newSize == 0) {
        this.key = null;
        this.value = null;
        return false;
    } else {
        return true;
    }
}

From source file:org.apache.accumulo.core.client.admin.FindMax.java

License:Apache License

private static Text findMidPoint(Text minBS, Text maxBS) {
    ByteArrayOutputStream startOS = new ByteArrayOutputStream();
    startOS.write(0); // add a leading zero so bigint does not think its negative
    startOS.write(minBS.getBytes(), 0, minBS.getLength());

    ByteArrayOutputStream endOS = new ByteArrayOutputStream();
    endOS.write(0);// add a leading zero so bigint does not think its negative
    endOS.write(maxBS.getBytes(), 0, maxBS.getLength());

    // make the numbers of the same magnitude
    if (startOS.size() < endOS.size())
        appendZeros(startOS, endOS.size() - startOS.size());
    else if (endOS.size() < startOS.size())
        appendZeros(endOS, startOS.size() - endOS.size());

    BigInteger min = new BigInteger(startOS.toByteArray());
    BigInteger max = new BigInteger(endOS.toByteArray());

    BigInteger mid = max.subtract(min).divide(BigInteger.valueOf(2)).add(min);

    byte[] ba = mid.toByteArray();

    Text ret = new Text();

    if (ba.length == startOS.size()) {
        if (ba[0] != 0)
            throw new RuntimeException();

        // big int added a zero so it would not be negative, drop it
        ret.set(ba, 1, ba.length - 1);//ww w .jav  a  2s  .co  m
    } else {
        int expLen = Math.max(minBS.getLength(), maxBS.getLength());
        // big int will drop leading 0x0 bytes
        for (int i = ba.length; i < expLen; i++) {
            ret.append(new byte[] { 0 }, 0, 1);
        }

        ret.append(ba, 0, ba.length);
    }

    // remove trailing 0x0 bytes
    while (ret.getLength() > 0 && ret.getBytes()[ret.getLength() - 1] == 0 && ret.compareTo(minBS) > 0) {
        Text t = new Text();
        t.set(ret.getBytes(), 0, ret.getLength() - 1);
        ret = t;
    }

    return ret;
}

From source file:org.apache.accumulo.core.client.admin.FindMax.java

License:Apache License

private static Text findInitialEnd(Scanner scanner) {
    Text end = new Text(new byte[] { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff });

    scanner.setRange(new Range(end, null));

    while (scanner.iterator().hasNext()) {
        Text t = new Text();
        t.append(end.getBytes(), 0, end.getLength());
        t.append(end.getBytes(), 0, end.getLength());
        end = t;/*w  w w .  ja  v  a 2 s . c o  m*/
        scanner.setRange(new Range(end, null));
    }

    return end;
}