Example usage for org.apache.hadoop.io Text append

List of usage examples for org.apache.hadoop.io Text append

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text append.

Prototype

public void append(byte[] utf8, int start, int len) 

Source Link

Document

Append a range of bytes to the end of the given text

Usage

From source file:com.ricemap.spateDB.core.Partition.java

License:Apache License

@Override
public Text toText(Text text) {
    super.toText(text);
    byte[] temp = ("," + filename).getBytes();
    text.append(temp, 0, temp.length);
    return text;/*from  w ww .  j  ava2  s .  c o m*/
}

From source file:com.ricemap.spateDB.io.Text2.java

License:Apache License

@Override
public Text toText(Text text) {
    text.append(getBytes(), 0, getLength());
    return text;
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

/**
 * Appends hex representation of the given number to the given string.
 * If append is set to true, a comma is also appended to the text.
 * @param i/*from   w ww .  ja v  a2  s  . c om*/
 * @param t
 * @param appendComma
 */
public static void serializeHexLong(long i, Text t, char toAppend) {
    // Calculate number of bytes needed to serialize the given long
    int bytes_needed = 0;
    long temp;
    if (i < 0) {
        bytes_needed++; // An additional
        temp = -i;
    } else {
        temp = i;
    }
    do {
        bytes_needed += 1;
        temp >>>= 4;
    } while (temp != 0);

    if (toAppend != '\0')
        bytes_needed++;

    // Reserve the bytes needed in the text
    t.append(ToAppend, 0, bytes_needed);
    // Extract the underlying buffer array and fill it directly
    byte[] buffer = t.getBytes();
    // Position of the next character to write in the text
    int position = t.getLength() - 1;

    if (toAppend != '\0')
        buffer[position--] = (byte) toAppend;

    final int shift = 4;
    final int radix = 1 << shift;
    final long mask = radix - 1;

    // Negative sign is prepended separately for negative numbers
    boolean negative = false;
    if (i < 0) {
        i = -i;
        negative = true;
    }
    do {
        buffer[position--] = digits[(int) (i & mask)];
        i >>>= shift;
    } while (i != 0);
    if (negative)
        buffer[position--] = '-';
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

/**
 * Appends hex representation of the given number to the given string.
 * If append is set to true, a comma is also appended to the text.
 * @param i/*  www  . j  a  va 2  s .c  om*/
 * @param t
 * @param appendComma
 */
public static void serializeDouble(double d, Text t, char toAppend) {
    byte[] bytes = Double.toString(d).getBytes();
    t.append(bytes, 0, bytes.length);
    if (toAppend != '\0') {
        t.append(new byte[] { (byte) toAppend }, 0, 1);
    }
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static void serializeLong(long i, Text t, char toAppend) {
    // Calculate number of bytes needed to serialize the given long
    int bytes_needed = 0;
    long temp;/*w  ww. j  av  a 2  s  . c  om*/
    if (i < 0) {
        bytes_needed++; // An additional
        temp = -i;
    } else {
        temp = i;
    }
    do {
        bytes_needed += 1;
        temp /= 10;
    } while (temp != 0);

    if (toAppend != '\0')
        bytes_needed++;

    // Reserve the bytes needed in the text
    t.append(ToAppend, 0, bytes_needed);
    // Extract the underlying buffer array and fill it directly
    byte[] buffer = t.getBytes();
    // Position of the next character to write in the text
    int position = t.getLength() - 1;

    if (toAppend != '\0')
        buffer[position--] = (byte) toAppend;

    // Negative sign is prepended separately for negative numbers
    boolean negative = false;
    if (i < 0) {
        i = -i;
        negative = true;
    }
    do {
        int digit = (int) (i % 10);
        buffer[position--] = digits[digit];
        i /= 10;
    } while (i != 0);
    if (negative)
        buffer[position--] = '-';
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static void serializeInt(int i, Text t, char toAppend) {
    // Calculate number of bytes needed to serialize the given long
    int bytes_needed = 0;
    int temp;//from   w ww.  j a v  a  2 s  .co m
    if (i < 0) {
        bytes_needed++; // An additional
        temp = -i;
    } else {
        temp = i;
    }
    do {
        bytes_needed += 1;
        temp /= 10;
    } while (temp != 0);

    if (toAppend != '\0')
        bytes_needed++;

    // Reserve the bytes needed in the text
    t.append(ToAppend, 0, bytes_needed);
    // Extract the underlying buffer array and fill it directly
    byte[] buffer = t.getBytes();
    // Position of the next character to write in the text
    int position = t.getLength() - 1;

    if (toAppend != '\0')
        buffer[position--] = (byte) toAppend;

    // Negative sign is prepended separately for negative numbers
    boolean negative = false;
    if (i < 0) {
        i = -i;
        negative = true;
    }
    do {
        int digit = i % 10;
        buffer[position--] = digits[digit];
        i /= 10;
    } while (i != 0);
    if (negative)
        buffer[position--] = '-';
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static Text serializeMap(Text text, Map<String, String> tags) {
    if (!tags.isEmpty()) {
        boolean first = true;
        text.append(Separators, MapStart, 1);
        for (Map.Entry<String, String> entry : tags.entrySet()) {
            if (first) {
                first = false;// w w  w.j  a v  a  2  s  .  com
            } else {
                first = true;
                text.append(Separators, FieldSeparator, 1);
            }
            byte[] k = entry.getKey().getBytes();
            text.append(k, 0, k.length);
            text.append(Separators, KeyValueSeparator, 1);
            byte[] v = entry.getValue().getBytes();
            text.append(v, 0, v.length);
        }
        text.append(Separators, MapEnd, 1);
    }
    return text;
}

From source file:com.ricemap.spateDB.mapred.SpatialRecordReader.java

License:Apache License

/**
 * Reads the next line from input and return true if a line was read.
 * If no more lines are available in this split, a false is returned.
 * @param value//from   ww  w .j a v a2 s.c  om
 * @return
 * @throws IOException
 */
protected boolean nextLine(Text value) throws IOException {
    if (blockType == BlockType.RTREE && pos == 8) {
        // File is positioned at the RTree header
        // Skip the header and go to first data object in file
        pos += RTree.skipHeader(in);
        LOG.info("Skipped R-tree to position: " + pos);
        // Reinitialize record reader at the new position
        lineReader = new LineReader(in);
    }
    while (getFilePosition() <= end) {
        value.clear();
        int b = 0;
        if (buffer != null) {
            // Read the first line encountered in buffer
            int eol = RTree.skipToEOL(buffer, 0);
            b += eol;
            value.append(buffer, 0, eol);
            if (eol < buffer.length) {
                // There are still some bytes remaining in buffer
                byte[] tmp = new byte[buffer.length - eol];
                System.arraycopy(buffer, eol, tmp, 0, tmp.length);
            } else {
                buffer = null;
            }
            // Check if a complete line has been read from the buffer
            byte last_byte = value.getBytes()[value.getLength() - 1];
            if (last_byte == '\n' || last_byte == '\r')
                return true;
        }

        // Read the first line from stream
        Text temp = new Text();
        b += lineReader.readLine(temp);
        if (b == 0) {
            // Indicates an end of stream
            return false;
        }
        pos += b;

        // Append the part read from stream to the part extracted from buffer
        value.append(temp.getBytes(), 0, temp.getLength());

        if (value.getLength() > 1) {
            // Read a non-empty line. Note that end-of-line character is included
            return true;
        }
    }
    // Reached end of file
    return false;
}

From source file:com.ricemap.spateDB.operations.Tail.java

License:Apache License

/**
 * Reads a maximum of n lines from the stream starting from its current
 * position and going backward./*w w  w .  ja  v a  2 s  .c om*/
 * 
 * @param in - An input stream. It'll be scanned from its current position
 *   backward till position 0
 * @param n - Maximum number of lines to return
 * @param stockObject - An object used to deserialize lines read. It can
 *   be set to <code>null</code> if output is also <code>null</code>. In this
 *   case, nothing is reported to the output.
 * @param output - An output collector used to report lines read.
 * @return - The position of the beginning of the earliest line read from
 *   buffer.
 * @throws IOException
 */
public static <T extends TextSerializable> long tail(FSDataInputStream in, int n, T stockObject,
        ResultCollector<T> output) throws IOException {
    int lines_read = 0;
    long end = in.getPos();
    long offset_of_last_eol = end;
    long last_read_byte = end;

    LongWritable line_offset = new LongWritable();
    Text read_line = new Text();
    Text remainder_from_last_buffer = new Text();
    byte[] buffer = new byte[4096];

    while (last_read_byte > 0 && lines_read < n) {
        // Read next chunk from the back
        long first_byte_to_read = (last_read_byte - 1) - (last_read_byte - 1) % buffer.length;
        in.seek(first_byte_to_read);
        int bytes_to_read = (int) (last_read_byte - first_byte_to_read);
        in.read(buffer, 0, bytes_to_read);
        last_read_byte = first_byte_to_read;

        // Iterate over bytes in this buffer
        int i_last_byte_consumed_in_buffer = bytes_to_read;
        int i_last_byte_examined_in_buffer = bytes_to_read;
        while (i_last_byte_examined_in_buffer > 0 && lines_read < n) {
            byte byte_examined = buffer[--i_last_byte_examined_in_buffer];
            if (byte_examined == '\n' || byte_examined == '\r') {
                // Found an end of line character
                // Report this to output unless it's empty
                long offset_of_this_eol = first_byte_to_read + i_last_byte_examined_in_buffer;
                if (offset_of_last_eol - offset_of_this_eol > 1) {
                    if (output != null) {
                        read_line.clear();
                        // +1 is to skip the EOL at the beginning
                        read_line.append(buffer, i_last_byte_examined_in_buffer + 1,
                                i_last_byte_consumed_in_buffer - (i_last_byte_examined_in_buffer + 1));
                        // Also append bytes remaining from last buffer
                        if (remainder_from_last_buffer.getLength() > 0) {
                            read_line.append(remainder_from_last_buffer.getBytes(), 0,
                                    remainder_from_last_buffer.getLength());
                        }
                        line_offset.set(offset_of_this_eol + 1);
                        stockObject.fromText(read_line);
                        output.collect(stockObject);
                    }
                    lines_read++;
                    remainder_from_last_buffer.clear();
                }
                i_last_byte_consumed_in_buffer = i_last_byte_examined_in_buffer;
                offset_of_last_eol = offset_of_this_eol;
            }
        }
        if (i_last_byte_consumed_in_buffer > 0) {
            // There are still some bytes not consumed in buffer
            if (remainder_from_last_buffer.getLength() == 0) {
                // Store whatever is remaining in remainder_from_last_buffer
                remainder_from_last_buffer.append(buffer, 0, i_last_byte_consumed_in_buffer);
            } else {
                // Prepend remaining bytes to Text
                Text t = new Text();
                t.append(buffer, 0, i_last_byte_consumed_in_buffer);
                t.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength());
                remainder_from_last_buffer = t;
            }
        }
    }

    if (lines_read < n && remainder_from_last_buffer.getLength() > 0) {
        // There is still one last line needs to be reported
        lines_read++;
        if (output != null) {
            read_line = remainder_from_last_buffer;
            line_offset.set(0);
            stockObject.fromText(read_line);
            output.collect(stockObject);
        }
        offset_of_last_eol = -1;
    }

    return offset_of_last_eol + 1;
}

From source file:com.tgam.hadoop.util.GenericEscapedLineReader.java

License:Apache License

/**
 * Read one line from the InputStream into the given Text.  A line
 * can be terminated by one of the following: '\n' (LF) , '\r' (CR),
 * or '\r\n' (CR+LF).  EOF also terminates an otherwise unterminated
 * line./*  w  w w.j  a v a  2  s  .co  m*/
 *
 * @param str the object to store the given line (without newline)
 * @param maxLineLength the maximum number of bytes to store into str;
 *  the rest of the line is silently discarded.
 * @param maxBytesToConsume the maximum number of bytes to consume
 *  in this call.  This is only a hint, because if the line cross
 *  this threshold, we allow it to happen.  It can overshoot
 *  potentially by as much as one buffer length.
 *
 * @return the number of bytes read including the (longest) newline
 * found.
 *
 * @throws IOException if the underlying stream throws
 */
public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    /* We're reading data from in, but the head of the stream may be
    * already buffered in buffer, so we have several cases:
    * 1. No newline characters are in the buffer, so we need to copy
    *    everything and read another buffer from the stream.
    * 2. An unambiguously terminated line is in buffer, so we just
    *    copy to str.
    * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
    *    in CR.  In this case we copy everything up to CR to str, but
    *    we also need to see what follows CR: if it's LF, then we
    *    need consume LF as well, so next call to readLine will read
    *    from after that.
    * We use a flag prevCharCR to signal if previous character was CR
    * and, if it happens to be at the end of the buffer, delay
    * consuming it until we have a chance to look at the char that
    * follows.
    */
    str.clear();
    int txtLength = 0; //tracks str.getLength(), as an optimization
    int newlineLength = 0; //length of terminating newline
    boolean prevCharCR = false; //true of prev char was CR
    boolean prevCharEscape = false;
    long bytesConsumed = 0;
    do {
        int startPosn = bufferPosn; //starting from where we left off the last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            if (prevCharCR)
                ++bytesConsumed; //account for CR from previous read
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
            if (buffer[bufferPosn] == LF && !prevCharEscape) {
                newlineLength = (prevCharCR) ? 2 : 1;
                ++bufferPosn; // at next invocation proceed from following byte
                break;
            }
            if (prevCharCR) { //CR + notLF, we are at notLF
                newlineLength = 1;
                break;
            }

            prevCharCR = (buffer[bufferPosn] == CR && !prevCharEscape);
            prevCharEscape = (buffer[bufferPosn] == ESCAPE);
        }
        int readLength = bufferPosn - startPosn;
        if (prevCharCR && newlineLength == 0)
            --readLength; //CR at the end of the buffer
        bytesConsumed += readLength;
        int appendLength = readLength - newlineLength;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before newline: " + bytesConsumed);
    return (int) bytesConsumed;
}