List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:com.ebay.nest.io.sede.lazy.LazySimpleSerDe.java
License:Apache License
/** * Serialize the row into the StringBuilder. * * @param out// www .j a v a2 s . com * The StringBuilder to store the serialized data. * @param obj * The object for the current field. * @param objInspector * The ObjectInspector for the current Object. * @param separators * The separators array. * @param level * The current level of separator. * @param nullSequence * The byte sequence representing the NULL value. * @param escaped * Whether we need to escape the data when writing out * @param escapeChar * Which char to use as the escape char, e.g. '\\' * @param needsEscape * Which chars needs to be escaped. This array should have size of * 128. Negative byte values (or byte values >= 128) are never * escaped. * @throws IOException * @throws SerDeException */ public static void serialize(ByteStream.Output out, Object obj, ObjectInspector objInspector, byte[] separators, int level, Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException, SerDeException { if (obj == null) { out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); return; } char separator; List<?> list; switch (objInspector.getCategory()) { case PRIMITIVE: LazyUtils.writePrimitiveUTF8(out, obj, (PrimitiveObjectInspector) objInspector, escaped, escapeChar, needsEscape); return; case LIST: separator = (char) LazyUtils.getSeparator(separators, level); ListObjectInspector loi = (ListObjectInspector) objInspector; list = loi.getList(obj); ObjectInspector eoi = loi.getListElementObjectInspector(); if (list == null) { out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); } else { for (int i = 0; i < list.size(); i++) { if (i > 0) { out.write(separator); } serialize(out, list.get(i), eoi, separators, level + 1, nullSequence, escaped, escapeChar, needsEscape); } } return; case MAP: separator = (char) LazyUtils.getSeparator(separators, level); char keyValueSeparator = (char) LazyUtils.getSeparator(separators, level + 1); MapObjectInspector moi = (MapObjectInspector) objInspector; ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); Map<?, ?> map = moi.getMap(obj); if (map == null) { out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); } else { boolean first = true; for (Map.Entry<?, ?> entry : map.entrySet()) { if (first) { first = false; } else { out.write(separator); } serialize(out, entry.getKey(), koi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape); out.write(keyValueSeparator); serialize(out, entry.getValue(), voi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape); } } return; case STRUCT: separator = (char) LazyUtils.getSeparator(separators, level); StructObjectInspector soi = (StructObjectInspector) objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); list = soi.getStructFieldsDataAsList(obj); if (list == null) { out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); } else { for (int i = 0; i < list.size(); i++) { if (i > 0) { out.write(separator); } serialize(out, list.get(i), fields.get(i).getFieldObjectInspector(), separators, level + 1, nullSequence, escaped, escapeChar, needsEscape); } } return; case UNION: separator = (char) LazyUtils.getSeparator(separators, level); UnionObjectInspector uoi = (UnionObjectInspector) objInspector; List<? extends ObjectInspector> ois = uoi.getObjectInspectors(); if (ois == null) { out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); } else { LazyUtils.writePrimitiveUTF8(out, new Byte(uoi.getTag(obj)), PrimitiveObjectInspectorFactory.javaByteObjectInspector, escaped, escapeChar, needsEscape); out.write(separator); serialize(out, uoi.getField(obj), ois.get(uoi.getTag(obj)), separators, level + 1, nullSequence, escaped, escapeChar, needsEscape); } return; default: break; } throw new RuntimeException("Unknown category type: " + objInspector.getCategory()); }
From source file:com.ebay.nest.io.sede.lazy.LazyStruct.java
License:Apache License
/** * Get the field out of the row without checking parsed. This is called by * both getField and getFieldsAsList./*from w w w . ja va 2s . c o m*/ * * @param fieldID * The id of the field starting from 0. * @param nullSequence * The sequence representing NULL value. * @return The value of the field */ private Object uncheckedGetField(int fieldID) { Text nullSequence = oi.getNullSequence(); // Test the length first so in most cases we avoid doing a byte[] // comparison. int fieldByteBegin = startPosition[fieldID]; int fieldLength = startPosition[fieldID + 1] - startPosition[fieldID] - 1; if ((fieldLength < 0) || (fieldLength == nullSequence.getLength() && LazyUtils.compare(bytes.getData(), fieldByteBegin, fieldLength, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0)) { return null; } if (!fieldInited[fieldID]) { fieldInited[fieldID] = true; fields[fieldID].init(bytes, fieldByteBegin, fieldLength); } return fields[fieldID].getObject(); }
From source file:com.ebay.nest.io.sede.lazy.LazyUnion.java
License:Apache License
/** * Get the field out of the row without checking parsed. * * @return The value of the field/* w ww. ja va 2 s . c o m*/ */ private Object uncheckedGetField() { Text nullSequence = oi.getNullSequence(); int fieldLength = start + length - startPosition; if (fieldLength != 0 && fieldLength == nullSequence.getLength() && LazyUtils.compare(bytes.getData(), startPosition, fieldLength, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) { return null; } if (!fieldInited) { fieldInited = true; field.init(bytes, startPosition, fieldLength); } return field.getObject(); }
From source file:com.ebay.nest.io.sede.lazy.LazyUtils.java
License:Apache License
/** * Write out the text representation of a Primitive Object to a UTF8 byte * stream./* w w w. ja v a 2 s. c o m*/ * * @param out * The UTF8 byte OutputStream * @param o * The primitive Object * @param needsEscape * Whether a character needs escaping. This array should have size of * 128. */ public static void writePrimitiveUTF8(OutputStream out, Object o, PrimitiveObjectInspector oi, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException { switch (oi.getPrimitiveCategory()) { case BOOLEAN: { boolean b = ((BooleanObjectInspector) oi).get(o); if (b) { out.write(trueBytes, 0, trueBytes.length); } else { out.write(falseBytes, 0, falseBytes.length); } break; } case BYTE: { LazyInteger.writeUTF8(out, ((ByteObjectInspector) oi).get(o)); break; } case SHORT: { LazyInteger.writeUTF8(out, ((ShortObjectInspector) oi).get(o)); break; } case INT: { LazyInteger.writeUTF8(out, ((IntObjectInspector) oi).get(o)); break; } case LONG: { LazyLong.writeUTF8(out, ((LongObjectInspector) oi).get(o)); break; } case FLOAT: { float f = ((FloatObjectInspector) oi).get(o); ByteBuffer b = Text.encode(String.valueOf(f)); out.write(b.array(), 0, b.limit()); break; } case DOUBLE: { double d = ((DoubleObjectInspector) oi).get(o); ByteBuffer b = Text.encode(String.valueOf(d)); out.write(b.array(), 0, b.limit()); break; } case STRING: { Text t = ((StringObjectInspector) oi).getPrimitiveWritableObject(o); writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape); break; } case VARCHAR: { HiveVarcharWritable hc = ((HiveVarcharObjectInspector) oi).getPrimitiveWritableObject(o); Text t = hc.getTextValue(); writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape); break; } case BINARY: { BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); byte[] toEncode = new byte[bw.getLength()]; System.arraycopy(bw.getBytes(), 0, toEncode, 0, bw.getLength()); byte[] toWrite = Base64.encodeBase64(toEncode); out.write(toWrite, 0, toWrite.length); break; } case DATE: { LazyDate.writeUTF8(out, ((DateObjectInspector) oi).getPrimitiveWritableObject(o)); break; } case TIMESTAMP: { LazyTimestamp.writeUTF8(out, ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o)); break; } case DECIMAL: { HiveDecimal bd = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); ByteBuffer b = Text.encode(bd.toString()); out.write(b.array(), 0, b.limit()); break; } default: { throw new RuntimeException("Hive internal error."); } } }
From source file:com.facebook.presto.accumulo.AccumuloClient.java
License:Apache License
/** * Gets the TabletServer hostname for where the given key is located in the given table * * @param table Fully-qualified table name * @param key Key to locate//w w w . j a v a 2 s. co m * @return The tablet location, or DUMMY_LOCATION if an error occurs */ private Optional<String> getTabletLocation(String table, Key key) { try { // Get the Accumulo table ID so we can scan some fun stuff String tableId = connector.tableOperations().tableIdMap().get(table); // Create our scanner against the metadata table, fetching 'loc' family Scanner scanner = connector.createScanner("accumulo.metadata", auths); scanner.fetchColumnFamily(new Text("loc")); // Set the scan range to just this table, from the table ID to the default tablet // row, which is the last listed tablet Key defaultTabletRow = new Key(tableId + '<'); Key start = new Key(tableId); Key end = defaultTabletRow.followingKey(PartialKey.ROW); scanner.setRange(new Range(start, end)); Optional<String> location = Optional.empty(); if (key == null) { // if the key is null, then it is -inf, so get first tablet location Iterator<Entry<Key, Value>> iter = scanner.iterator(); if (iter.hasNext()) { location = Optional.of(iter.next().getValue().toString()); } } else { // Else, we will need to scan through the tablet location data and find the location // Create some text objects to do comparison for what we are looking for Text splitCompareKey = new Text(); key.getRow(splitCompareKey); Text scannedCompareKey = new Text(); // Scan the table! for (Entry<Key, Value> entry : scanner) { // Get the bytes of the key byte[] keyBytes = entry.getKey().getRow().copyBytes(); // If the last byte is <, then we have hit the default tablet, so use this location if (keyBytes[keyBytes.length - 1] == '<') { location = Optional.of(entry.getValue().toString()); break; } else { // Chop off some magic nonsense scannedCompareKey.set(keyBytes, 3, keyBytes.length - 3); // Compare the keys, moving along the tablets until the location is found if (scannedCompareKey.getLength() > 0) { int compareTo = splitCompareKey.compareTo(scannedCompareKey); if (compareTo <= 0) { location = Optional.of(entry.getValue().toString()); } else { // all future tablets will be greater than this key break; } } } } scanner.close(); } // If we were unable to find the location for some reason, return the default tablet // location return location.isPresent() ? location : getDefaultTabletLocation(table); } catch (Exception e) { // Swallow this exception so the query does not fail due to being unable // to locate the tablet server for the provided Key. // This is purely an optimization, but we will want to log the error. LOG.error("Failed to get tablet location, returning dummy location", e); return Optional.empty(); } }
From source file:com.facebook.presto.accumulo.examples.TpcHClerkSearch.java
License:Apache License
@Override public int run(AccumuloConfig config, CommandLine cmd) throws Exception { String[] searchTerms = cmd.getOptionValues(CLERK_ID); ZooKeeperInstance inst = new ZooKeeperInstance(config.getInstance(), config.getZooKeepers()); Connector conn = inst.getConnector(config.getUsername(), new PasswordToken(config.getPassword())); // Ensure both tables exists validateExists(conn, DATA_TABLE);//from w w w .ja va2 s.c om validateExists(conn, INDEX_TABLE); long start = System.currentTimeMillis(); // Create a scanner against the index table BatchScanner idxScanner = conn.createBatchScanner(INDEX_TABLE, new Authorizations(), 10); LinkedList<Range> searchRanges = new LinkedList<Range>(); // Create a search Range from the command line args for (String searchTerm : searchTerms) { if (clerkRegex.matcher(searchTerm).matches()) { searchRanges.add(new Range(searchTerm)); } else { throw new InvalidParameterException( format("Search term %s does not match regex Clerk#[0-9]{9}", searchTerm)); } } // Set the search ranges for our scanner idxScanner.setRanges(searchRanges); // A list to hold all of the order IDs LinkedList<Range> orderIds = new LinkedList<Range>(); String orderId; // Process all of the records returned by the batch scanner for (Map.Entry<Key, Value> record : idxScanner) { // Get the order ID and add it to the list of order IDs orderIds.add(new Range(record.getKey().getColumnQualifier())); } // Close the batch scanner idxScanner.close(); // If clerkIDs is empty, log a message and return 0 if (orderIds.isEmpty()) { System.out.println("Found no orders with the given Clerk ID(s)"); return 0; } else { System.out.println(format("Searching data table for %d orders", orderIds.size())); } // Initialize the batch scanner to scan the data table with // the previously found order IDs as the ranges BatchScanner dataScanner = conn.createBatchScanner(DATA_TABLE, new Authorizations(), 10); dataScanner.setRanges(orderIds); dataScanner.addScanIterator(new IteratorSetting(1, WholeRowIterator.class)); Text row = new Text(); // The row ID Text colQual = new Text(); // The column qualifier of the current record Long orderkey = null; Long custkey = null; String orderstatus = null; Double totalprice = null; Date orderdate = null; String orderpriority = null; String clerk = null; Long shippriority = null; String comment = null; int numTweets = 0; // Process all of the records returned by the batch scanner for (Map.Entry<Key, Value> entry : dataScanner) { entry.getKey().getRow(row); orderkey = decode(Long.class, row.getBytes(), row.getLength()); SortedMap<Key, Value> rowMap = WholeRowIterator.decodeRow(entry.getKey(), entry.getValue()); for (Map.Entry<Key, Value> record : rowMap.entrySet()) { // Get the column qualifier from the record's key record.getKey().getColumnQualifier(colQual); switch (colQual.toString()) { case CUSTKEY_STR: custkey = decode(Long.class, record.getValue().get()); break; case ORDERSTATUS_STR: orderstatus = decode(String.class, record.getValue().get()); break; case TOTALPRICE_STR: totalprice = decode(Double.class, record.getValue().get()); break; case ORDERDATE_STR: orderdate = decode(Date.class, record.getValue().get()); break; case ORDERPRIORITY_STR: orderpriority = decode(String.class, record.getValue().get()); break; case CLERK_STR: clerk = decode(String.class, record.getValue().get()); break; case SHIPPRIORITY_STR: shippriority = decode(Long.class, record.getValue().get()); break; case COMMENT_STR: comment = decode(String.class, record.getValue().get()); break; default: throw new RuntimeException("Unknown column qualifier " + colQual); } } ++numTweets; // Write the screen name and text to stdout System.out.println(format("%d|%d|%s|%f|%s|%s|%s|%d|%s", orderkey, custkey, orderstatus, totalprice, orderdate, orderpriority, clerk, shippriority, comment)); custkey = null; shippriority = null; orderstatus = null; orderpriority = null; clerk = null; comment = null; totalprice = null; orderdate = null; } // Close the batch scanner dataScanner.close(); long finish = System.currentTimeMillis(); System.out.format("Found %d orders in %s ms\n", numTweets, (finish - start)); return 0; }
From source file:com.facebook.presto.hive.DwrfHiveRecordCursor.java
License:Apache License
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true;// w ww .j av a 2 s . c o m nulls[column] = false; OrcLazyObject lazyObject = getRawValue(column); if (lazyObject == null) { nulls[column] = true; return; } Object value = materializeValue(lazyObject); if (value == null) { nulls[column] = true; return; } HiveType type = hiveTypes[column]; if (type.getCategory() == Category.MAP || type.getCategory() == Category.LIST || type.getCategory() == Category.STRUCT) { slices[column] = Slices .wrappedBuffer(getJsonBytes(sessionTimeZone, lazyObject, fieldInspectors[column])); } else if (type.equals(HIVE_STRING)) { Text text = checkWritable(value, Text.class); slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength()); } else if (type.equals(HIVE_BINARY)) { BytesWritable bytesWritable = checkWritable(value, BytesWritable.class); slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0, bytesWritable.getLength()); } else { throw new RuntimeException(String.format("%s is not a valid STRING type", type)); } }
From source file:com.facebook.presto.hive.orc.DwrfHiveRecordCursor.java
License:Apache License
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true;/*from www . ja v a 2 s. c o m*/ nulls[column] = false; OrcLazyObject lazyObject = getRawValue(column); if (lazyObject == null) { nulls[column] = true; return; } Object value = materializeValue(lazyObject); if (value == null) { nulls[column] = true; return; } HiveType type = hiveTypes[column]; if (type.equals(HIVE_STRING)) { Text text = checkWritable(value, Text.class); slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength()); } else if (type.equals(HIVE_BINARY)) { BytesWritable bytesWritable = checkWritable(value, BytesWritable.class); slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0, bytesWritable.getLength()); } else { throw new RuntimeException(String.format("%s is not a valid STRING type", type)); } }
From source file:com.facebook.presto.hive.orc.OrcHiveRecordCursor.java
License:Apache License
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true;//from w ww .ja v a 2 s . c om nulls[column] = false; Object object = getFieldValue(row, hiveColumnIndexes[column]); if (object == null) { nulls[column] = true; return; } HiveType type = hiveTypes[column]; if (type.equals(HIVE_STRING)) { Text text = Types.checkType(object, Text.class, "materialized string value"); slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength()); } else if (type.equals(HIVE_BINARY)) { BytesWritable bytesWritable = Types.checkType(object, BytesWritable.class, "materialized binary value"); slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0, bytesWritable.getLength()); } else { throw new RuntimeException(String.format("%s is not a valid STRING type", type)); } }
From source file:com.facebook.presto.hive.OrcHiveRecordCursor.java
License:Apache License
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true;//from w w w . jav a2s.co m nulls[column] = false; Object object = getFieldValue(row, hiveColumnIndexes[column]); if (object == null) { nulls[column] = true; return; } HiveType type = hiveTypes[column]; if (type.getCategory() == Category.MAP || type.getCategory() == Category.LIST || type.getCategory() == Category.STRUCT) { slices[column] = Slices.wrappedBuffer(getJsonBytes(sessionTimeZone, object, fieldInspectors[column])); } else if (type.equals(HIVE_STRING)) { Text text = Types.checkType(object, Text.class, "materialized string value"); slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength()); } else if (type.equals(HIVE_BINARY)) { BytesWritable bytesWritable = Types.checkType(object, BytesWritable.class, "materialized binary value"); slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0, bytesWritable.getLength()); } else { throw new RuntimeException(String.format("%s is not a valid STRING type", type)); } }