Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:com.ebay.nest.io.sede.lazy.LazySimpleSerDe.java

License:Apache License

/**
 * Serialize the row into the StringBuilder.
 *
 * @param out// www .j  a v  a2  s  .  com
 *          The StringBuilder to store the serialized data.
 * @param obj
 *          The object for the current field.
 * @param objInspector
 *          The ObjectInspector for the current Object.
 * @param separators
 *          The separators array.
 * @param level
 *          The current level of separator.
 * @param nullSequence
 *          The byte sequence representing the NULL value.
 * @param escaped
 *          Whether we need to escape the data when writing out
 * @param escapeChar
 *          Which char to use as the escape char, e.g. '\\'
 * @param needsEscape
 *          Which chars needs to be escaped. This array should have size of
 *          128. Negative byte values (or byte values >= 128) are never
 *          escaped.
 * @throws IOException
 * @throws SerDeException
 */
public static void serialize(ByteStream.Output out, Object obj, ObjectInspector objInspector, byte[] separators,
        int level, Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape)
        throws IOException, SerDeException {

    if (obj == null) {
        out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
        return;
    }

    char separator;
    List<?> list;
    switch (objInspector.getCategory()) {
    case PRIMITIVE:
        LazyUtils.writePrimitiveUTF8(out, obj, (PrimitiveObjectInspector) objInspector, escaped, escapeChar,
                needsEscape);
        return;
    case LIST:
        separator = (char) LazyUtils.getSeparator(separators, level);
        ListObjectInspector loi = (ListObjectInspector) objInspector;
        list = loi.getList(obj);
        ObjectInspector eoi = loi.getListElementObjectInspector();
        if (list == null) {
            out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
        } else {
            for (int i = 0; i < list.size(); i++) {
                if (i > 0) {
                    out.write(separator);
                }
                serialize(out, list.get(i), eoi, separators, level + 1, nullSequence, escaped, escapeChar,
                        needsEscape);
            }
        }
        return;
    case MAP:
        separator = (char) LazyUtils.getSeparator(separators, level);
        char keyValueSeparator = (char) LazyUtils.getSeparator(separators, level + 1);

        MapObjectInspector moi = (MapObjectInspector) objInspector;
        ObjectInspector koi = moi.getMapKeyObjectInspector();
        ObjectInspector voi = moi.getMapValueObjectInspector();
        Map<?, ?> map = moi.getMap(obj);
        if (map == null) {
            out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
        } else {
            boolean first = true;
            for (Map.Entry<?, ?> entry : map.entrySet()) {
                if (first) {
                    first = false;
                } else {
                    out.write(separator);
                }
                serialize(out, entry.getKey(), koi, separators, level + 2, nullSequence, escaped, escapeChar,
                        needsEscape);
                out.write(keyValueSeparator);
                serialize(out, entry.getValue(), voi, separators, level + 2, nullSequence, escaped, escapeChar,
                        needsEscape);
            }
        }
        return;
    case STRUCT:
        separator = (char) LazyUtils.getSeparator(separators, level);
        StructObjectInspector soi = (StructObjectInspector) objInspector;
        List<? extends StructField> fields = soi.getAllStructFieldRefs();
        list = soi.getStructFieldsDataAsList(obj);
        if (list == null) {
            out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
        } else {
            for (int i = 0; i < list.size(); i++) {
                if (i > 0) {
                    out.write(separator);
                }
                serialize(out, list.get(i), fields.get(i).getFieldObjectInspector(), separators, level + 1,
                        nullSequence, escaped, escapeChar, needsEscape);
            }
        }
        return;
    case UNION:
        separator = (char) LazyUtils.getSeparator(separators, level);
        UnionObjectInspector uoi = (UnionObjectInspector) objInspector;
        List<? extends ObjectInspector> ois = uoi.getObjectInspectors();
        if (ois == null) {
            out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
        } else {
            LazyUtils.writePrimitiveUTF8(out, new Byte(uoi.getTag(obj)),
                    PrimitiveObjectInspectorFactory.javaByteObjectInspector, escaped, escapeChar, needsEscape);
            out.write(separator);
            serialize(out, uoi.getField(obj), ois.get(uoi.getTag(obj)), separators, level + 1, nullSequence,
                    escaped, escapeChar, needsEscape);
        }
        return;
    default:
        break;
    }

    throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
}

From source file:com.ebay.nest.io.sede.lazy.LazyStruct.java

License:Apache License

/**
 * Get the field out of the row without checking parsed. This is called by
 * both getField and getFieldsAsList./*from  w w w  . ja  va 2s . c  o  m*/
 *
 * @param fieldID
 *          The id of the field starting from 0.
 * @param nullSequence
 *          The sequence representing NULL value.
 * @return The value of the field
 */
private Object uncheckedGetField(int fieldID) {
    Text nullSequence = oi.getNullSequence();
    // Test the length first so in most cases we avoid doing a byte[]
    // comparison.
    int fieldByteBegin = startPosition[fieldID];
    int fieldLength = startPosition[fieldID + 1] - startPosition[fieldID] - 1;
    if ((fieldLength < 0) || (fieldLength == nullSequence.getLength() && LazyUtils.compare(bytes.getData(),
            fieldByteBegin, fieldLength, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0)) {
        return null;
    }
    if (!fieldInited[fieldID]) {
        fieldInited[fieldID] = true;
        fields[fieldID].init(bytes, fieldByteBegin, fieldLength);
    }
    return fields[fieldID].getObject();
}

From source file:com.ebay.nest.io.sede.lazy.LazyUnion.java

License:Apache License

/**
 * Get the field out of the row without checking parsed.
 *
 * @return The value of the field/*  w ww.  ja va  2  s . c o m*/
 */
private Object uncheckedGetField() {
    Text nullSequence = oi.getNullSequence();
    int fieldLength = start + length - startPosition;
    if (fieldLength != 0 && fieldLength == nullSequence.getLength() && LazyUtils.compare(bytes.getData(),
            startPosition, fieldLength, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) {
        return null;
    }

    if (!fieldInited) {
        fieldInited = true;
        field.init(bytes, startPosition, fieldLength);
    }
    return field.getObject();
}

From source file:com.ebay.nest.io.sede.lazy.LazyUtils.java

License:Apache License

/**
 * Write out the text representation of a Primitive Object to a UTF8 byte
 * stream./*  w  w w.  ja  v a  2  s.  c  o  m*/
 *
 * @param out
 *          The UTF8 byte OutputStream
 * @param o
 *          The primitive Object
 * @param needsEscape
 *          Whether a character needs escaping. This array should have size of
 *          128.
 */
public static void writePrimitiveUTF8(OutputStream out, Object o, PrimitiveObjectInspector oi, boolean escaped,
        byte escapeChar, boolean[] needsEscape) throws IOException {

    switch (oi.getPrimitiveCategory()) {
    case BOOLEAN: {
        boolean b = ((BooleanObjectInspector) oi).get(o);
        if (b) {
            out.write(trueBytes, 0, trueBytes.length);
        } else {
            out.write(falseBytes, 0, falseBytes.length);
        }
        break;
    }
    case BYTE: {
        LazyInteger.writeUTF8(out, ((ByteObjectInspector) oi).get(o));
        break;
    }
    case SHORT: {
        LazyInteger.writeUTF8(out, ((ShortObjectInspector) oi).get(o));
        break;
    }
    case INT: {
        LazyInteger.writeUTF8(out, ((IntObjectInspector) oi).get(o));
        break;
    }
    case LONG: {
        LazyLong.writeUTF8(out, ((LongObjectInspector) oi).get(o));
        break;
    }
    case FLOAT: {
        float f = ((FloatObjectInspector) oi).get(o);
        ByteBuffer b = Text.encode(String.valueOf(f));
        out.write(b.array(), 0, b.limit());
        break;
    }
    case DOUBLE: {
        double d = ((DoubleObjectInspector) oi).get(o);
        ByteBuffer b = Text.encode(String.valueOf(d));
        out.write(b.array(), 0, b.limit());
        break;
    }
    case STRING: {
        Text t = ((StringObjectInspector) oi).getPrimitiveWritableObject(o);
        writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape);
        break;
    }

    case VARCHAR: {
        HiveVarcharWritable hc = ((HiveVarcharObjectInspector) oi).getPrimitiveWritableObject(o);
        Text t = hc.getTextValue();
        writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape);
        break;
    }
    case BINARY: {
        BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
        byte[] toEncode = new byte[bw.getLength()];
        System.arraycopy(bw.getBytes(), 0, toEncode, 0, bw.getLength());
        byte[] toWrite = Base64.encodeBase64(toEncode);
        out.write(toWrite, 0, toWrite.length);
        break;
    }
    case DATE: {
        LazyDate.writeUTF8(out, ((DateObjectInspector) oi).getPrimitiveWritableObject(o));
        break;
    }
    case TIMESTAMP: {
        LazyTimestamp.writeUTF8(out, ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o));
        break;
    }
    case DECIMAL: {
        HiveDecimal bd = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
        ByteBuffer b = Text.encode(bd.toString());
        out.write(b.array(), 0, b.limit());
        break;
    }
    default: {
        throw new RuntimeException("Hive internal error.");
    }
    }
}

From source file:com.facebook.presto.accumulo.AccumuloClient.java

License:Apache License

/**
 * Gets the TabletServer hostname for where the given key is located in the given table
 *
 * @param table Fully-qualified table name
 * @param key Key to locate//w w w  . j  a  v  a  2 s. co  m
 * @return The tablet location, or DUMMY_LOCATION if an error occurs
 */
private Optional<String> getTabletLocation(String table, Key key) {
    try {
        // Get the Accumulo table ID so we can scan some fun stuff
        String tableId = connector.tableOperations().tableIdMap().get(table);

        // Create our scanner against the metadata table, fetching 'loc' family
        Scanner scanner = connector.createScanner("accumulo.metadata", auths);
        scanner.fetchColumnFamily(new Text("loc"));

        // Set the scan range to just this table, from the table ID to the default tablet
        // row, which is the last listed tablet
        Key defaultTabletRow = new Key(tableId + '<');
        Key start = new Key(tableId);
        Key end = defaultTabletRow.followingKey(PartialKey.ROW);
        scanner.setRange(new Range(start, end));

        Optional<String> location = Optional.empty();
        if (key == null) {
            // if the key is null, then it is -inf, so get first tablet location
            Iterator<Entry<Key, Value>> iter = scanner.iterator();
            if (iter.hasNext()) {
                location = Optional.of(iter.next().getValue().toString());
            }
        } else {
            // Else, we will need to scan through the tablet location data and find the location

            // Create some text objects to do comparison for what we are looking for
            Text splitCompareKey = new Text();
            key.getRow(splitCompareKey);
            Text scannedCompareKey = new Text();

            // Scan the table!
            for (Entry<Key, Value> entry : scanner) {
                // Get the bytes of the key
                byte[] keyBytes = entry.getKey().getRow().copyBytes();

                // If the last byte is <, then we have hit the default tablet, so use this location
                if (keyBytes[keyBytes.length - 1] == '<') {
                    location = Optional.of(entry.getValue().toString());
                    break;
                } else {
                    // Chop off some magic nonsense
                    scannedCompareKey.set(keyBytes, 3, keyBytes.length - 3);

                    // Compare the keys, moving along the tablets until the location is found
                    if (scannedCompareKey.getLength() > 0) {
                        int compareTo = splitCompareKey.compareTo(scannedCompareKey);
                        if (compareTo <= 0) {
                            location = Optional.of(entry.getValue().toString());
                        } else {
                            // all future tablets will be greater than this key
                            break;
                        }
                    }
                }
            }
            scanner.close();
        }

        // If we were unable to find the location for some reason, return the default tablet
        // location
        return location.isPresent() ? location : getDefaultTabletLocation(table);
    } catch (Exception e) {
        // Swallow this exception so the query does not fail due to being unable
        // to locate the tablet server for the provided Key.
        // This is purely an optimization, but we will want to log the error.
        LOG.error("Failed to get tablet location, returning dummy location", e);
        return Optional.empty();
    }
}

From source file:com.facebook.presto.accumulo.examples.TpcHClerkSearch.java

License:Apache License

@Override
public int run(AccumuloConfig config, CommandLine cmd) throws Exception {
    String[] searchTerms = cmd.getOptionValues(CLERK_ID);

    ZooKeeperInstance inst = new ZooKeeperInstance(config.getInstance(), config.getZooKeepers());
    Connector conn = inst.getConnector(config.getUsername(), new PasswordToken(config.getPassword()));

    // Ensure both tables exists
    validateExists(conn, DATA_TABLE);//from   w w w  .ja  va2 s.c  om
    validateExists(conn, INDEX_TABLE);

    long start = System.currentTimeMillis();

    // Create a scanner against the index table
    BatchScanner idxScanner = conn.createBatchScanner(INDEX_TABLE, new Authorizations(), 10);
    LinkedList<Range> searchRanges = new LinkedList<Range>();

    // Create a search Range from the command line args
    for (String searchTerm : searchTerms) {
        if (clerkRegex.matcher(searchTerm).matches()) {
            searchRanges.add(new Range(searchTerm));
        } else {
            throw new InvalidParameterException(
                    format("Search term %s does not match regex Clerk#[0-9]{9}", searchTerm));
        }
    }

    // Set the search ranges for our scanner
    idxScanner.setRanges(searchRanges);

    // A list to hold all of the order IDs
    LinkedList<Range> orderIds = new LinkedList<Range>();
    String orderId;

    // Process all of the records returned by the batch scanner
    for (Map.Entry<Key, Value> record : idxScanner) {
        // Get the order ID and add it to the list of order IDs
        orderIds.add(new Range(record.getKey().getColumnQualifier()));
    }

    // Close the batch scanner
    idxScanner.close();

    // If clerkIDs is empty, log a message and return 0
    if (orderIds.isEmpty()) {
        System.out.println("Found no orders with the given Clerk ID(s)");
        return 0;
    } else {
        System.out.println(format("Searching data table for %d orders", orderIds.size()));
    }

    // Initialize the batch scanner to scan the data table with
    // the previously found order IDs as the ranges
    BatchScanner dataScanner = conn.createBatchScanner(DATA_TABLE, new Authorizations(), 10);
    dataScanner.setRanges(orderIds);
    dataScanner.addScanIterator(new IteratorSetting(1, WholeRowIterator.class));

    Text row = new Text(); // The row ID
    Text colQual = new Text(); // The column qualifier of the current record

    Long orderkey = null;
    Long custkey = null;
    String orderstatus = null;
    Double totalprice = null;
    Date orderdate = null;
    String orderpriority = null;
    String clerk = null;
    Long shippriority = null;
    String comment = null;

    int numTweets = 0;
    // Process all of the records returned by the batch scanner
    for (Map.Entry<Key, Value> entry : dataScanner) {
        entry.getKey().getRow(row);
        orderkey = decode(Long.class, row.getBytes(), row.getLength());
        SortedMap<Key, Value> rowMap = WholeRowIterator.decodeRow(entry.getKey(), entry.getValue());
        for (Map.Entry<Key, Value> record : rowMap.entrySet()) {
            // Get the column qualifier from the record's key
            record.getKey().getColumnQualifier(colQual);

            switch (colQual.toString()) {
            case CUSTKEY_STR:
                custkey = decode(Long.class, record.getValue().get());
                break;
            case ORDERSTATUS_STR:
                orderstatus = decode(String.class, record.getValue().get());
                break;
            case TOTALPRICE_STR:
                totalprice = decode(Double.class, record.getValue().get());
                break;
            case ORDERDATE_STR:
                orderdate = decode(Date.class, record.getValue().get());
                break;
            case ORDERPRIORITY_STR:
                orderpriority = decode(String.class, record.getValue().get());
                break;
            case CLERK_STR:
                clerk = decode(String.class, record.getValue().get());
                break;
            case SHIPPRIORITY_STR:
                shippriority = decode(Long.class, record.getValue().get());
                break;
            case COMMENT_STR:
                comment = decode(String.class, record.getValue().get());
                break;
            default:
                throw new RuntimeException("Unknown column qualifier " + colQual);
            }
        }

        ++numTweets;
        // Write the screen name and text to stdout
        System.out.println(format("%d|%d|%s|%f|%s|%s|%s|%d|%s", orderkey, custkey, orderstatus, totalprice,
                orderdate, orderpriority, clerk, shippriority, comment));

        custkey = null;
        shippriority = null;
        orderstatus = null;
        orderpriority = null;
        clerk = null;
        comment = null;
        totalprice = null;
        orderdate = null;
    }

    // Close the batch scanner
    dataScanner.close();

    long finish = System.currentTimeMillis();

    System.out.format("Found %d orders in %s ms\n", numTweets, (finish - start));
    return 0;
}

From source file:com.facebook.presto.hive.DwrfHiveRecordCursor.java

License:Apache License

private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;// w ww  .j  av a 2  s .  c  o m
    nulls[column] = false;

    OrcLazyObject lazyObject = getRawValue(column);
    if (lazyObject == null) {
        nulls[column] = true;
        return;
    }

    Object value = materializeValue(lazyObject);
    if (value == null) {
        nulls[column] = true;
        return;
    }

    HiveType type = hiveTypes[column];
    if (type.getCategory() == Category.MAP || type.getCategory() == Category.LIST
            || type.getCategory() == Category.STRUCT) {
        slices[column] = Slices
                .wrappedBuffer(getJsonBytes(sessionTimeZone, lazyObject, fieldInspectors[column]));
    } else if (type.equals(HIVE_STRING)) {
        Text text = checkWritable(value, Text.class);
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength());
    } else if (type.equals(HIVE_BINARY)) {
        BytesWritable bytesWritable = checkWritable(value, BytesWritable.class);
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0,
                bytesWritable.getLength());
    } else {
        throw new RuntimeException(String.format("%s is not a valid STRING type", type));
    }
}

From source file:com.facebook.presto.hive.orc.DwrfHiveRecordCursor.java

License:Apache License

private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;/*from www  . ja  v a  2  s.  c  o  m*/
    nulls[column] = false;

    OrcLazyObject lazyObject = getRawValue(column);
    if (lazyObject == null) {
        nulls[column] = true;
        return;
    }

    Object value = materializeValue(lazyObject);
    if (value == null) {
        nulls[column] = true;
        return;
    }

    HiveType type = hiveTypes[column];
    if (type.equals(HIVE_STRING)) {
        Text text = checkWritable(value, Text.class);
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength());
    } else if (type.equals(HIVE_BINARY)) {
        BytesWritable bytesWritable = checkWritable(value, BytesWritable.class);
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0,
                bytesWritable.getLength());
    } else {
        throw new RuntimeException(String.format("%s is not a valid STRING type", type));
    }
}

From source file:com.facebook.presto.hive.orc.OrcHiveRecordCursor.java

License:Apache License

private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;//from   w ww .ja  v  a  2  s  .  c  om
    nulls[column] = false;

    Object object = getFieldValue(row, hiveColumnIndexes[column]);
    if (object == null) {
        nulls[column] = true;
        return;
    }

    HiveType type = hiveTypes[column];
    if (type.equals(HIVE_STRING)) {
        Text text = Types.checkType(object, Text.class, "materialized string value");
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength());
    } else if (type.equals(HIVE_BINARY)) {
        BytesWritable bytesWritable = Types.checkType(object, BytesWritable.class, "materialized binary value");
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0,
                bytesWritable.getLength());
    } else {
        throw new RuntimeException(String.format("%s is not a valid STRING type", type));
    }
}

From source file:com.facebook.presto.hive.OrcHiveRecordCursor.java

License:Apache License

private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;//from   w w  w . jav  a2s.co  m
    nulls[column] = false;

    Object object = getFieldValue(row, hiveColumnIndexes[column]);
    if (object == null) {
        nulls[column] = true;
        return;
    }

    HiveType type = hiveTypes[column];
    if (type.getCategory() == Category.MAP || type.getCategory() == Category.LIST
            || type.getCategory() == Category.STRUCT) {
        slices[column] = Slices.wrappedBuffer(getJsonBytes(sessionTimeZone, object, fieldInspectors[column]));
    } else if (type.equals(HIVE_STRING)) {
        Text text = Types.checkType(object, Text.class, "materialized string value");
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength());
    } else if (type.equals(HIVE_BINARY)) {
        BytesWritable bytesWritable = Types.checkType(object, BytesWritable.class, "materialized binary value");
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0,
                bytesWritable.getLength());
    } else {
        throw new RuntimeException(String.format("%s is not a valid STRING type", type));
    }
}