Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static void serializeLong(long i, Text t, char toAppend) {
    // Calculate number of bytes needed to serialize the given long
    int bytes_needed = 0;
    long temp;//  w  w  w . j  ava  2  s  . c  o m
    if (i < 0) {
        bytes_needed++; // An additional
        temp = -i;
    } else {
        temp = i;
    }
    do {
        bytes_needed += 1;
        temp /= 10;
    } while (temp != 0);

    if (toAppend != '\0')
        bytes_needed++;

    // Reserve the bytes needed in the text
    t.append(ToAppend, 0, bytes_needed);
    // Extract the underlying buffer array and fill it directly
    byte[] buffer = t.getBytes();
    // Position of the next character to write in the text
    int position = t.getLength() - 1;

    if (toAppend != '\0')
        buffer[position--] = (byte) toAppend;

    // Negative sign is prepended separately for negative numbers
    boolean negative = false;
    if (i < 0) {
        i = -i;
        negative = true;
    }
    do {
        int digit = (int) (i % 10);
        buffer[position--] = digits[digit];
        i /= 10;
    } while (i != 0);
    if (negative)
        buffer[position--] = '-';
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static long consumeLong(Text text, char separator) {
    int i = 0;//  w  w  w.j  a v a 2  s. c o  m
    byte[] bytes = text.getBytes();
    // Skip until the separator or end of text
    while (i < text.getLength() && DecimalChars[bytes[i]])
        i++;
    long l = deserializeLong(bytes, 0, i);
    // If the first char after the long is the separator, skip it
    if (i < text.getLength() && bytes[i] == separator)
        i++;
    // Shift bytes after the long
    System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);
    text.set(bytes, 0, text.getLength() - i);
    return l;
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static void serializeInt(int i, Text t, char toAppend) {
    // Calculate number of bytes needed to serialize the given long
    int bytes_needed = 0;
    int temp;/*from ww  w.  j ava2s  . co  m*/
    if (i < 0) {
        bytes_needed++; // An additional
        temp = -i;
    } else {
        temp = i;
    }
    do {
        bytes_needed += 1;
        temp /= 10;
    } while (temp != 0);

    if (toAppend != '\0')
        bytes_needed++;

    // Reserve the bytes needed in the text
    t.append(ToAppend, 0, bytes_needed);
    // Extract the underlying buffer array and fill it directly
    byte[] buffer = t.getBytes();
    // Position of the next character to write in the text
    int position = t.getLength() - 1;

    if (toAppend != '\0')
        buffer[position--] = (byte) toAppend;

    // Negative sign is prepended separately for negative numbers
    boolean negative = false;
    if (i < 0) {
        i = -i;
        negative = true;
    }
    do {
        int digit = i % 10;
        buffer[position--] = digits[digit];
        i /= 10;
    } while (i != 0);
    if (negative)
        buffer[position--] = '-';
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static int consumeInt(Text text, char separator) {
    int i = 0;/*from   w  w w  . j  av  a  2 s. c  om*/
    byte[] bytes = text.getBytes();
    // Skip until the separator or end of text
    while (i < text.getLength() && DecimalChars[bytes[i]])
        i++;
    int l = deserializeInt(bytes, 0, i);
    // If the first char after the long is the separator, skip it
    if (i < text.getLength() && bytes[i] == separator)
        i++;
    // Shift bytes after the long
    System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);
    text.set(bytes, 0, text.getLength() - i);
    return l;
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static void consumeMap(Text text, Map<String, String> tags) {
    tags.clear();/* w  w  w. j av a  2 s.  c o  m*/
    if (text.getLength() > 0) {
        byte[] tagsBytes = text.getBytes();
        if (tagsBytes[0] != Separators[MapStart])
            return;
        int i1 = 1;
        while (i1 < text.getLength() && tagsBytes[i1] != Separators[MapEnd]) {
            int i2 = i1 + 1;
            while (i2 < text.getLength() && tagsBytes[i2] != Separators[KeyValueSeparator])
                i2++;
            String key = new String(tagsBytes, i1, i2 - i1);
            i1 = i2 + 1;

            i2 = i1 + 1;
            while (i2 < text.getLength() && tagsBytes[i2] != Separators[FieldSeparator]
                    && tagsBytes[i2] != Separators[MapEnd])
                i2++;
            String value = new String(tagsBytes, i1, i2 - i1);
            tags.put(key, value);
            i1 = i2;
            if (i1 < text.getLength() && tagsBytes[i1] == Separators[FieldSeparator])
                i1++;
        }
        text.set(tagsBytes, i1, text.getLength() - i1);
    }
}

From source file:com.ricemap.spateDB.mapred.SpatialRecordReader.java

License:Apache License

/**
 * Reads the next line from input and return true if a line was read.
 * If no more lines are available in this split, a false is returned.
 * @param value// w  w  w.  j  a  va  2s  .co m
 * @return
 * @throws IOException
 */
protected boolean nextLine(Text value) throws IOException {
    if (blockType == BlockType.RTREE && pos == 8) {
        // File is positioned at the RTree header
        // Skip the header and go to first data object in file
        pos += RTree.skipHeader(in);
        LOG.info("Skipped R-tree to position: " + pos);
        // Reinitialize record reader at the new position
        lineReader = new LineReader(in);
    }
    while (getFilePosition() <= end) {
        value.clear();
        int b = 0;
        if (buffer != null) {
            // Read the first line encountered in buffer
            int eol = RTree.skipToEOL(buffer, 0);
            b += eol;
            value.append(buffer, 0, eol);
            if (eol < buffer.length) {
                // There are still some bytes remaining in buffer
                byte[] tmp = new byte[buffer.length - eol];
                System.arraycopy(buffer, eol, tmp, 0, tmp.length);
            } else {
                buffer = null;
            }
            // Check if a complete line has been read from the buffer
            byte last_byte = value.getBytes()[value.getLength() - 1];
            if (last_byte == '\n' || last_byte == '\r')
                return true;
        }

        // Read the first line from stream
        Text temp = new Text();
        b += lineReader.readLine(temp);
        if (b == 0) {
            // Indicates an end of stream
            return false;
        }
        pos += b;

        // Append the part read from stream to the part extracted from buffer
        value.append(temp.getBytes(), 0, temp.getLength());

        if (value.getLength() > 1) {
            // Read a non-empty line. Note that end-of-line character is included
            return true;
        }
    }
    // Reached end of file
    return false;
}

From source file:com.ricemap.spateDB.operations.RecordCount.java

License:Apache License

/**
 * Counts the exact number of lines in a file by opening the file and
 * reading it line by line/*  ww w .  j  a  va 2 s  . com*/
 * @param fs
 * @param file
 * @return
 * @throws IOException
 */
public static long recordCountLocal(FileSystem fs, Path file) throws IOException {
    LineReader lineReader = new LineReader(fs.open(file));
    Text line = new Text();
    long lineCount = 0;

    while (lineReader.readLine(line) > 0) {
        if (line.getLength() > 0)
            lineCount++;
    }
    lineReader.close();
    return lineCount;
}

From source file:com.ricemap.spateDB.operations.Tail.java

License:Apache License

/**
 * Reads a maximum of n lines from the stream starting from its current
 * position and going backward./*from ww  w . j  a  va  2  s  .  c om*/
 * 
 * @param in - An input stream. It'll be scanned from its current position
 *   backward till position 0
 * @param n - Maximum number of lines to return
 * @param stockObject - An object used to deserialize lines read. It can
 *   be set to <code>null</code> if output is also <code>null</code>. In this
 *   case, nothing is reported to the output.
 * @param output - An output collector used to report lines read.
 * @return - The position of the beginning of the earliest line read from
 *   buffer.
 * @throws IOException
 */
public static <T extends TextSerializable> long tail(FSDataInputStream in, int n, T stockObject,
        ResultCollector<T> output) throws IOException {
    int lines_read = 0;
    long end = in.getPos();
    long offset_of_last_eol = end;
    long last_read_byte = end;

    LongWritable line_offset = new LongWritable();
    Text read_line = new Text();
    Text remainder_from_last_buffer = new Text();
    byte[] buffer = new byte[4096];

    while (last_read_byte > 0 && lines_read < n) {
        // Read next chunk from the back
        long first_byte_to_read = (last_read_byte - 1) - (last_read_byte - 1) % buffer.length;
        in.seek(first_byte_to_read);
        int bytes_to_read = (int) (last_read_byte - first_byte_to_read);
        in.read(buffer, 0, bytes_to_read);
        last_read_byte = first_byte_to_read;

        // Iterate over bytes in this buffer
        int i_last_byte_consumed_in_buffer = bytes_to_read;
        int i_last_byte_examined_in_buffer = bytes_to_read;
        while (i_last_byte_examined_in_buffer > 0 && lines_read < n) {
            byte byte_examined = buffer[--i_last_byte_examined_in_buffer];
            if (byte_examined == '\n' || byte_examined == '\r') {
                // Found an end of line character
                // Report this to output unless it's empty
                long offset_of_this_eol = first_byte_to_read + i_last_byte_examined_in_buffer;
                if (offset_of_last_eol - offset_of_this_eol > 1) {
                    if (output != null) {
                        read_line.clear();
                        // +1 is to skip the EOL at the beginning
                        read_line.append(buffer, i_last_byte_examined_in_buffer + 1,
                                i_last_byte_consumed_in_buffer - (i_last_byte_examined_in_buffer + 1));
                        // Also append bytes remaining from last buffer
                        if (remainder_from_last_buffer.getLength() > 0) {
                            read_line.append(remainder_from_last_buffer.getBytes(), 0,
                                    remainder_from_last_buffer.getLength());
                        }
                        line_offset.set(offset_of_this_eol + 1);
                        stockObject.fromText(read_line);
                        output.collect(stockObject);
                    }
                    lines_read++;
                    remainder_from_last_buffer.clear();
                }
                i_last_byte_consumed_in_buffer = i_last_byte_examined_in_buffer;
                offset_of_last_eol = offset_of_this_eol;
            }
        }
        if (i_last_byte_consumed_in_buffer > 0) {
            // There are still some bytes not consumed in buffer
            if (remainder_from_last_buffer.getLength() == 0) {
                // Store whatever is remaining in remainder_from_last_buffer
                remainder_from_last_buffer.append(buffer, 0, i_last_byte_consumed_in_buffer);
            } else {
                // Prepend remaining bytes to Text
                Text t = new Text();
                t.append(buffer, 0, i_last_byte_consumed_in_buffer);
                t.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength());
                remainder_from_last_buffer = t;
            }
        }
    }

    if (lines_read < n && remainder_from_last_buffer.getLength() > 0) {
        // There is still one last line needs to be reported
        lines_read++;
        if (output != null) {
            read_line = remainder_from_last_buffer;
            line_offset.set(0);
            stockObject.fromText(read_line);
            output.collect(stockObject);
        }
        offset_of_last_eol = -1;
    }

    return offset_of_last_eol + 1;
}

From source file:com.ricemap.spateDB.util.CommandLineArguments.java

License:Apache License

/**
 * //from   w ww. java2s.  c o m
 * @param autodetect - Automatically detect shape type from input file
 *   if shape is not explicitly set by user
 * @return
 */
public Shape getShape(boolean autodetect) {
    String shapeTypeStr = get("shape");
    final Text shapeType = new Text();
    if (shapeTypeStr != null)
        shapeType.set(shapeTypeStr.toLowerCase().getBytes());

    if (autodetect && shapeType.getLength() == 0 && getPath() != null) {
        // Shape type not found in parameters. Try to infer from a line in input
        // file
        Path in_file = getPath();
        try {
            Sampler.sampleLocal(in_file.getFileSystem(new Configuration()), in_file, 1, 0,
                    new ResultCollector<Text2>() {
                        @Override
                        public void collect(Text2 value) {
                            String val = value.toString();
                            String[] parts = val.split(",");
                            if (parts.length == 2) {
                                shapeType.set("point".getBytes());
                            } else if (parts.length == 4) {
                                shapeType.set("rect".getBytes());
                            } else if (parts.length > 4) {
                                shapeType.set("tiger".getBytes());
                            }
                        }
                    }, new Text2(), new Text2());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    Shape stockShape = null;
    if (shapeType.toString().startsWith("rect")) {
        stockShape = new Prism();
    } else if (shapeType.toString().startsWith("point")) {
        stockShape = new Point3d();
    } else if (shapeType.toString().startsWith("datapoint")) {
        stockShape = new DataPoint();
    } else if (shapeTypeStr != null) {
        // Use the shapeType as a class name and try to instantiate it dynamically
        try {
            Class<? extends Shape> shapeClass = Class.forName(shapeTypeStr).asSubclass(Shape.class);
            stockShape = shapeClass.newInstance();
        } catch (ClassNotFoundException e) {
        } catch (InstantiationException e) {
        } catch (IllegalAccessException e) {
        }
    }
    if (stockShape == null)
        LOG.warn("unknown shape type: " + shapeTypeStr);

    return stockShape;
}

From source file:com.rramos.bigdata.utils.GenericUDFSha2.java

License:Apache License

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    if (digest == null) {
        return null;
    }/*from  w  ww.  java 2  s  . c om*/

    digest.reset();
    if (isStr) {
        Text n = GenericUDFParamUtils.getTextValue(arguments, 0, converters);
        if (n == null) {
            return null;
        }
        digest.update(n.getBytes(), 0, n.getLength());
    } else {
        BytesWritable bWr = GenericUDFParamUtils.getBinaryValue(arguments, 0, converters);
        if (bWr == null) {
            return null;
        }
        digest.update(bWr.getBytes(), 0, bWr.getLength());
    }
    byte[] resBin = digest.digest();
    String resStr = Hex.encodeHexString(resBin);

    output.set(resStr);
    return output;
}