Example usage for org.apache.hadoop.io Text append

List of usage examples for org.apache.hadoop.io Text append

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text append.

Prototype

public void append(byte[] utf8, int start, int len) 

Source Link

Document

Append a range of bytes to the end of the given text

Usage

From source file:com.asakusafw.runtime.io.TsvParser.java

License:Apache License

private void consumeEncoded(Text text) {
    encodeBuffer.flip();/*w ww  .java  2s. c om*/
    if (encodeBuffer.hasRemaining()) {
        text.append(encodeBuffer.array(), encodeBuffer.position(), encodeBuffer.limit());
    }
    encodeBuffer.clear();
}

From source file:com.ashishpaliwal.hadoop.utils.inputformat.CsvLineReader.java

License:Apache License

/**
 * Read from the InputStream into the given Text.
 *
 * @param txt               the object to store the given line
 * @param maxLineLength     the maximum number of bytes to store into txt.
 * @param maxBytesToConsume the maximum number of bytes to consume in this
 *                          call./*w  w w.ja  v a2  s.  c  o  m*/
 * @return the number of bytes read including the newline
 * @throws IOException if the underlying stream throws
 */
public int readLine(Text txt, int maxLineLength, int maxBytesToConsume) throws IOException {
    txt.clear();
    boolean hadFinalNewline = false;
    boolean hadFinalReturn = false;
    boolean hitEndOfFile = false;
    int startPosn = bufferPosn;
    long bytesConsumed = 0;
    boolean inQuote = false;
    boolean isLastCharEscapeChar = false;

    outerLoop: while (true) {
        if (bufferPosn >= bufferLength) {
            if (!backfill()) {
                hitEndOfFile = true;
                break;
            }
        }

        startPosn = bufferPosn;

        for (; bufferPosn < bufferLength; ++bufferPosn) {

            switch (buffer[bufferPosn]) {

            case '\\':
                isLastCharEscapeChar = !isLastCharEscapeChar;
                break;

            case '"':
                if (!inQuote && hadFinalReturn) {
                    break outerLoop;
                }

                if (!isLastCharEscapeChar) {
                    inQuote = !inQuote;
                }
                isLastCharEscapeChar = false;
                break;

            case '\n':
                isLastCharEscapeChar = false;
                if (!inQuote) {
                    hadFinalNewline = true;
                    bufferPosn += 1;
                    break outerLoop;
                }
                break;

            case '\r':
                isLastCharEscapeChar = false;
                if (!inQuote) {
                    if (hadFinalReturn) {
                        // leave this \r in the stream, so we'll get it next time
                        break outerLoop;
                    }
                    hadFinalReturn = true;
                }
                break;

            default:
                isLastCharEscapeChar = false;
                if (!inQuote && hadFinalReturn) {
                    break outerLoop;
                }
            }
        }

        bytesConsumed += bufferPosn - startPosn;
        int length = bufferPosn - startPosn - (hadFinalReturn ? 1 : 0);
        length = Math.min(length, maxLineLength - txt.getLength());

        if (length >= 0)
            txt.append(buffer, startPosn, length);

        if (bytesConsumed >= maxBytesToConsume)
            return (int) Math.min(bytesConsumed, (long) Integer.MAX_VALUE);
    }

    int newlineLength = (hadFinalNewline ? 1 : 0) + (hadFinalReturn ? 1 : 0);

    if (!hitEndOfFile) {
        bytesConsumed += bufferPosn - startPosn;
        int length = bufferPosn - startPosn - newlineLength;
        length = Math.min(length, maxLineLength - txt.getLength());

        if (length > 0)
            txt.append(buffer, startPosn, length);
    }
    return (int) Math.min(bytesConsumed, (long) Integer.MAX_VALUE);
}

From source file:com.blm.orc.DynamicByteArray.java

License:Apache License

/**
 * Set a text value from the bytes in this dynamic array.
 * @param result the value to set/*w w  w .j  a  va 2s  .  c o m*/
 * @param offset the start of the bytes to copy
 * @param length the number of bytes to copy
 */
public void setText(Text result, int offset, int length) {
    result.clear();
    int currentChunk = offset / chunkSize;
    int currentOffset = offset % chunkSize;
    int currentLength = Math.min(length, chunkSize - currentOffset);
    while (length > 0) {
        result.append(data[currentChunk], currentOffset, currentLength);
        length -= currentLength;
        currentChunk += 1;
        currentOffset = 0;
        currentLength = Math.min(length, chunkSize - currentOffset);
    }
}

From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java

License:Apache License

/**
 * Read a line terminated by one of CR, LF, or CRLF.
 *///w w w .j  a  v a  2 s  .com
private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    /* We're reading data from in, but the head of the stream may be
     * already buffered in buffer, so we have several cases:
     * 1. No newline characters are in the buffer, so we need to copy
     *    everything and read another buffer from the stream.
     * 2. An unambiguously terminated line is in buffer, so we just
     *    copy to str.
     * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
     *    in CR.  In this case we copy everything up to CR to str, but
     *    we also need to see what follows CR: if it's LF, then we
     *    need consume LF as well, so next call to readLine will read
     *    from after that.
     * We use a flag prevCharCR to signal if previous character was CR
     * and, if it happens to be at the end of the buffer, delay
     * consuming it until we have a chance to look at the char that
     * follows.
     */
    str.clear();
    int txtLength = 0; //tracks str.getLength(), as an optimization
    int newlineLength = 0; //length of terminating newline
    boolean prevCharCR = false; //true of prev char was CR
    long bytesConsumed = 0;
    do {
        int startPosn = bufferPosn; //starting from where we left off the last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            if (prevCharCR)
                ++bytesConsumed; //account for CR from previous read
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
            if (buffer[bufferPosn] == LF) {
                newlineLength = (prevCharCR) ? 2 : 1;
                ++bufferPosn; // at next invocation proceed from following byte
                break;
            }
            if (prevCharCR) { //CR + notLF, we are at notLF
                newlineLength = 1;
                break;
            }
            prevCharCR = (buffer[bufferPosn] == CR);
        }
        int readLength = bufferPosn - startPosn;
        if (prevCharCR && newlineLength == 0)
            --readLength; //CR at the end of the buffer
        bytesConsumed += readLength;
        int appendLength = readLength - newlineLength;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before newline: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java

License:Apache License

/**
 * Read a line terminated by a custom delimiter.
 *///w  w  w . j  a v  a 2s .  c o m
private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    long bytesConsumed = 0;
    int delPosn = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the last
        // time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) {
            if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) {
                delPosn++;
                if (delPosn >= recordDelimiterBytes.length) {
                    bufferPosn++;
                    break;
                }
            } else {
                delPosn = 0;
            }
        }
        int readLength = bufferPosn - startPosn;
        bytesConsumed += readLength;
        int appendLength = readLength - delPosn;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume);
    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before delimiter: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.ery.hadoop.mrddx.file.LineReaders.java

License:Apache License

/**
 * Read a line terminated by one of CR, LF, or CRLF.
 *///from   w  w w.ja  va2s  . co m
private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    /*
     * We're reading data from in, but the head of the stream may be already
     * buffered in buffer, so we have several cases: 1. No newline
     * characters are in the buffer, so we need to copy everything and read
     * another buffer from the stream. 2. An unambiguously terminated line
     * is in buffer, so we just copy to str. 3. Ambiguously terminated line
     * is in buffer, i.e. buffer ends in CR. In this case we copy everything
     * up to CR to str, but we also need to see what follows CR: if it's LF,
     * then we need consume LF as well, so next call to readLine will read
     * from after that. We use a flag prevCharCR to signal if previous
     * character was CR and, if it happens to be at the end of the buffer,
     * delay consuming it until we have a chance to look at the char that
     * follows.
     */
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    int newlineLength = 0; // length of terminating newline
    boolean prevCharCR = false; // true of prev char was CR
    long bytesConsumed = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the
        // last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            if (prevCharCR)
                ++bytesConsumed; // account for CR from previous read
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) { // search for
            // newline
            if (buffer[bufferPosn] == LF) {
                newlineLength = (prevCharCR) ? 2 : 1;
                ++bufferPosn; // at next invocation proceed from following
                              // byte
                break;
            }
            if (prevCharCR) { // CR + notLF, we are at notLF
                newlineLength = 1;
                break;
            }
            prevCharCR = (buffer[bufferPosn] == CR);
        }
        int readLength = bufferPosn - startPosn;
        if (prevCharCR && newlineLength == 0)
            --readLength; // CR at the end of the buffer
        bytesConsumed += readLength;
        int appendLength = readLength - newlineLength;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before newline: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.ery.hadoop.mrddx.file.LineReaders.java

License:Apache License

/**
 * Read a line terminated by a custom delimiter.
 *///from   w ww  .  j  av  a 2  s .com
private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    long bytesConsumed = 0;
    int delPosn = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the
        // last
        // time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) {
            if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) {
                delPosn++;
                if (delPosn >= recordDelimiterBytes.length) {
                    bufferPosn++;
                    break;
                }
            } else {
                delPosn = 0;
            }
        }
        int readLength = bufferPosn - startPosn;
        bytesConsumed += readLength;
        int appendLength = readLength - delPosn;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume);
    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before delimiter: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.kasabi.labs.freebase.mr.Freebase2RDFMapper.java

License:Apache License

private void append(Text text, byte[] bytes) {
    text.append(bytes, 0, bytes.length);
}

From source file:com.kasabi.labs.freebase.mr.Freebase2RDFMapper.java

License:Apache License

private void append(Text text, String str) throws UnsupportedEncodingException {
    byte[] bytes = str.getBytes("UTF-8");
    text.append(bytes, 0, bytes.length);
}

From source file:com.ricemap.spateDB.core.GridInfo.java

License:Apache License

@Override
public Text toText(Text text) {
    final byte[] Comma = ",".getBytes();
    super.toText(text);
    text.append(Comma, 0, Comma.length);
    TextSerializerHelper.serializeLong(layers, text, ',');
    TextSerializerHelper.serializeLong(columns, text, ',');
    TextSerializerHelper.serializeLong(rows, text, '\0');
    return text;//from  w ww .  jav  a 2  s .  co m
}