Example usage for org.apache.hadoop.io Text copyBytes

List of usage examples for org.apache.hadoop.io Text copyBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text copyBytes.

Prototype

public byte[] copyBytes() 

Source Link

Document

Get a copy of the bytes that is exactly the length of the data.

Usage

From source file:mapred.io.CustomRecordReader.java

License:Apache License

private int skipUtfByteOrderMark() throws IOException {
    Text value = new Text();
    // Strip BOM(Byte Order Mark)
    // Text only support UTF-8, we only need to check UTF-8 BOM
    // (0xEF,0xBB,0xBF) at the start of the text stream.
    int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength, Integer.MAX_VALUE);
    int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
    // Even we read 3 extra bytes for the first line,
    // we won't alter existing behavior (no backwards incompat issue).
    // Because the newSize is less than maxLineLength and
    // the number of bytes copied to Text is always no more than newSize.
    // If the return size from readLine is not less than maxLineLength,
    // we will discard the current line and read the next line.
    pos += newSize;/*from   w  w  w .  j a va  2  s.c o m*/
    int textLength = value.getLength();
    byte[] textBytes = value.getBytes();
    if ((textLength >= 3) && (textBytes[0] == (byte) 0xEF) && (textBytes[1] == (byte) 0xBB)
            && (textBytes[2] == (byte) 0xBF)) {
        // find UTF-8 BOM, strip it.
        LOG.info("Found UTF-8 BOM and skipped it");
        textLength -= 3;
        newSize -= 3;
        if (textLength > 0) {
            // It may work to use the same buffer and not do the copyBytes
            textBytes = value.copyBytes();
            value.set(textBytes, 3, textLength);
        } else {
            value.clear();
        }
    }
    return newSize;
}

From source file:mr.MyFileRecordReader2.java

License:Apache License

private int skipUtfByteOrderMark(Text value) throws IOException {
    // Strip BOM(Byte Order Mark)
    // Text only support UTF-8, we only need to check UTF-8 BOM
    // (0xEF,0xBB,0xBF) at the start of the text stream.
    int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength, Integer.MAX_VALUE);
    int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
    // Even we read 3 extra bytes for the first line,
    // we won't alter existing behavior (no backwards incompat issue).
    // Because the newSize is less than maxLineLength and
    // the number of bytes copied to Text is always no more than newSize.
    // If the return size from readLine is not less than maxLineLength,
    // we will discard the current line and read the next line.
    pos += newSize;/*  ww  w  .j a  v a2s. com*/
    int textLength = value.getLength();
    byte[] textBytes = value.getBytes();
    if ((textLength >= 3) && (textBytes[0] == (byte) 0xEF) && (textBytes[1] == (byte) 0xBB)
            && (textBytes[2] == (byte) 0xBF)) {
        // find UTF-8 BOM, strip it.
        LOG.info("Found UTF-8 BOM and skipped it");
        textLength -= 3;
        newSize -= 3;
        if (textLength > 0) {
            // It may work to use the same buffer and not do the copyBytes
            textBytes = value.copyBytes();
            value.set(textBytes, 3, textLength);
        } else {
            value.clear();
        }
    }
    return newSize;
}

From source file:mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexerTest.java

License:Apache License

static String toHumanString(Text text) {
    return toHumanString(text == null ? null : text.copyBytes());
}

From source file:mvm.rya.indexing.KeyParts.java

License:Apache License

public static String toHumanString(Text text) {
    return toHumanString(text == null ? null : text.copyBytes());
}

From source file:org.apache.accumulo.core.data.LoadPlan.java

License:Apache License

private static byte[] copy(Text data) {
    return data == null ? null : data.copyBytes();
}

From source file:org.apache.rya.indexing.KeyParts.java

License:Apache License

public static String toHumanString(final Text text) {
    return toHumanString(text == null ? null : text.copyBytes());
}

From source file:org.utils.UnsplittableFileReader.java

License:Apache License

@Override
public synchronized boolean nextKeyValue() throws IOException {
    boolean res = reader.nextKeyValue();
    if (res) {//from   ww w.  ja va2  s .  c  o m
        LongWritable lineNumber = reader.getCurrentKey();
        Text lineString = reader.getCurrentValue();

        key.clear();
        key.setFilename(filename);
        key.setLine(lineNumber.get());

        value.clear();
        value.set(lineString.copyBytes());

        LOG.debug("read " + key);
    }

    return res;
}