Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:com.naltel.hadoop.hive.genericUDF.examples.GenericUDFInstr.java

License:Apache License

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    if (arguments[0].get() == null || arguments[1].get() == null) {
        return null;
    }/* w ww . j  a v  a 2s  . co  m*/

    Text text = (Text) converters[0].convert(arguments[0].get());
    Text subtext = (Text) converters[1].convert(arguments[1].get());
    int startIndex = (arguments.length >= 3) ? ((IntWritable) converters[2].convert(arguments[2].get())).get()
            : DEFAULT_START_INDEX;
    int nth = (arguments.length == 4) ? ((IntWritable) converters[3].convert(arguments[3].get())).get()
            : DEFAULT_NTH;

    // argument checking
    if (startIndex < 0) {
        // if startIndex is negative,
        // the function counts back startIndex number of characters from the
        // end of text and then searches
        // towards the beginning of text.
        startIndex = text.getLength() + startIndex;
    }
    if (startIndex <= 0 || startIndex > text.getLength()) {
        intWritable.set(0);
        return intWritable;
    }

    int index = 0;
    int currentIndex = startIndex;
    for (int i = 0; i < nth; i++) {
        index = GenericUDFUtils.findText(text, subtext, currentIndex - 1) + 1;
        if (index == 0) {// not found
            intWritable.set(0);
            return intWritable;
        }
        currentIndex = index + 1;
    }
    intWritable.set(index);
    return intWritable;
}

From source file:com.pagerankcalculator.calculation.PageRankCalculationMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    int tabIdx1 = value.find("\t");
    int tabIdx2 = value.find("\t", tabIdx1 + 1);

    String userID = Text.decode(value.getBytes(), 0, tabIdx1);
    String pageRank = Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1));
    String CSVFollowingIDs = Text.decode(value.getBytes(), tabIdx2 + 1, value.getLength() - (tabIdx2 + 1));

    //        System.out.print(userID);
    //        System.out.print("\t");
    //        System.out.print(pageRank);
    //        System.out.print("\t");
    //        System.out.println(CSVFollowingIDs);

    String[] followingIDs = CSVFollowingIDs.split(TwitterPageRank.FOLLOWING_LIST_DELIMETER);
    Integer totalFollowingIDs = followingIDs.length;
    for (String followingID : followingIDs) {
        String pageRankWithTotalFollowing = pageRank + "\t" + totalFollowingIDs.toString();

        context.write(new Text(followingID), new Text(pageRankWithTotalFollowing));
    }//from   w ww  .  j ava 2 s.c  o m

    context.write(new Text(userID), new Text(TwitterPageRank.FOLLOWING_LIST_TAG + CSVFollowingIDs));
}

From source file:com.pagerankcalculator.graphparsing.GraphParsingMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    int tabIndex = value.find("\t");

    userID = Text.decode(value.getBytes(), 0, tabIndex);
    followerID = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1));
    context.write(new Text(followerID), new Text(userID));
}

From source file:com.qq.pig.udf.CustomJsonLoader.java

License:Apache License

public Tuple parseTuple(Text val) throws IOException {
    // Create a parser specific for this input line.  This may not be the
    // most efficient approach.

    //TODO why make a byte copy?
    byte[] newBytes = new byte[val.getLength()];
    System.arraycopy(val.getBytes(), 0, newBytes, 0, val.getLength());

    ByteArrayInputStream bais = new ByteArrayInputStream(newBytes);
    JsonParser p = jsonFactory.createJsonParser(bais);

    // Create the tuple we will be returning.  We create it with the right
    // number of fields, as the Tuple object is optimized for this case.
    ResourceFieldSchema[] fields = schema.getFields();
    Tuple t = tupleFactory.newTuple(fields.length);

    // Read the start object marker.  Throughout this file if the parsing
    // isn't what we expect we return a tuple with null fields rather than
    // throwing an exception.  That way a few mangled lines don't fail the
    // job./*ww  w .j a  va  2 s.  com*/
    if (p.nextToken() != JsonToken.START_OBJECT) {
        warn("Bad record, could not find start of record " + val.toString(), PigWarning.UDF_WARNING_1);
        return t;
    }
    readFields(p, t);
    p.close();
    return t;
}

From source file:com.ricemap.spateDB.core.GridInfo.java

License:Apache License

@Override
public void fromText(Text text) {
    super.fromText(text);
    if (text.getLength() > 0) {
        // Remove the first comma
        System.arraycopy(text.getBytes(), 1, text.getBytes(), 0, text.getLength() - 1);
        layers = (int) TextSerializerHelper.consumeInt(text, ',');
        columns = (int) TextSerializerHelper.consumeInt(text, ',');
        rows = (int) TextSerializerHelper.consumeInt(text, '\0');
    }/*from   w  ww . j  a va  2  s  . c o m*/
}

From source file:com.ricemap.spateDB.core.GridRecordWriter.java

License:Apache License

/**
 * Close the given cell freeing all memory reserved by it.
 * Once a cell is closed, we should not write more data to it.
 * @param cellInfo/*  w  ww .ja  v a 2  s.  c om*/
 * @throws IOException
 */
protected void closeCellBackground(final Path intermediateCellPath, final Path finalCellPath,
        final OutputStream intermediateCellStream, final OutputStream masterFile, final Prism cellMbr)
        throws IOException {

    Thread closingThread = new Thread() {
        @Override
        public void run() {
            try {
                Path finalfinalCellPath = flushAllEntries(intermediateCellPath, intermediateCellStream,
                        finalCellPath);
                // Write an entry to the master file

                // Write a line to the master file including file name and cellInfo
                if (masterFile != null) {
                    Partition partition = new Partition(finalfinalCellPath.getName(), cellMbr);
                    Text line = partition.toText(new Text());
                    masterFile.write(line.getBytes(), 0, line.getLength());
                    masterFile.write(NEW_LINE);
                }
            } catch (IOException e) {
                throw new RuntimeException("Error closing thread", e);
            }
        }
    };

    closingThreads.add(closingThread);
    // Remove previously terminated threads
    while (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.TERMINATED) {
        closingThreads.remove(0);
    }
    // Start first thread (if exists)
    if (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.NEW)
        closingThreads.get(0).start();
}

From source file:com.ricemap.spateDB.core.Partition.java

License:Apache License

@Override
public void fromText(Text text) {
    super.fromText(text);
    // Skip the comma and read filename
    filename = new String(text.getBytes(), 1, text.getLength() - 1);
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

/**
 * Appends hex representation of the given number to the given string.
 * If append is set to true, a comma is also appended to the text.
 * @param i//  www.j  a v a  2  s  . c  om
 * @param t
 * @param appendComma
 */
public static void serializeHexLong(long i, Text t, char toAppend) {
    // Calculate number of bytes needed to serialize the given long
    int bytes_needed = 0;
    long temp;
    if (i < 0) {
        bytes_needed++; // An additional
        temp = -i;
    } else {
        temp = i;
    }
    do {
        bytes_needed += 1;
        temp >>>= 4;
    } while (temp != 0);

    if (toAppend != '\0')
        bytes_needed++;

    // Reserve the bytes needed in the text
    t.append(ToAppend, 0, bytes_needed);
    // Extract the underlying buffer array and fill it directly
    byte[] buffer = t.getBytes();
    // Position of the next character to write in the text
    int position = t.getLength() - 1;

    if (toAppend != '\0')
        buffer[position--] = (byte) toAppend;

    final int shift = 4;
    final int radix = 1 << shift;
    final long mask = radix - 1;

    // Negative sign is prepended separately for negative numbers
    boolean negative = false;
    if (i < 0) {
        i = -i;
        negative = true;
    }
    do {
        buffer[position--] = digits[(int) (i & mask)];
        i >>>= shift;
    } while (i != 0);
    if (negative)
        buffer[position--] = '-';
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

/**
 * Deserializes and consumes a long from the given text. Consuming means all
 * characters read for deserialization are removed from the given text.
 * If separator is non-zero, a long is read and consumed up to the first
 * occurrence of this separator. The separator is also consumed.
 * @param text/*from   w w w.jav a2 s.  c om*/
 * @param separator
 * @return
 */
public static long consumeHexLong(Text text, char separator) {
    int i = 0;
    byte[] bytes = text.getBytes();
    // Skip until the separator or end of text
    while (i < text.getLength() && HexadecimalChars[bytes[i]])
        i++;
    long l = deserializeHexLong(bytes, 0, i);
    // If the first char after the long is the separator, skip it
    if (i < text.getLength() && bytes[i] == separator)
        i++;
    // Shift bytes after the long
    System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);
    text.set(bytes, 0, text.getLength() - i);
    return l;
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

/**
 * Deserializes and consumes a double from the given text. Consuming means all
 * characters read for deserialization are removed from the given text.
 * If separator is non-zero, a double is read and consumed up to the first
 * occurrence of this separator. The separator is also consumed.
 * @param text//ww w . j  a  va  2  s.  c om
 * @param separator
 * @return
 */
public static double consumeDouble(Text text, char separator) {
    int i = 0;
    byte[] bytes = text.getBytes();
    // Skip until the separator or end of text
    while (i < text.getLength() && ((bytes[i] >= '0' && bytes[i] <= '9') || bytes[i] == 'e' || bytes[i] == 'E'
            || bytes[i] == '-' || bytes[i] == '+' || bytes[i] == '.'))
        i++;
    double d = deserializeDouble(bytes, 0, i);
    if (i < text.getLength() && bytes[i] == separator)
        i++;
    System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);
    text.set(bytes, 0, text.getLength() - i);
    return d;
}