List of usage examples for org.apache.hadoop.io Text decode
public static String decode(byte[] utf8, int start, int length) throws CharacterCodingException
From source file:com.pagerankcalculator.ordering.PageRankSortingMapper.java
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int tabIdx1 = value.find("\t"); int tabIdx2 = value.find("\t", tabIdx1 + 1); String username = Text.decode(value.getBytes(), 0, tabIdx1); Double pageRank = new Double(Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1))); context.write(new DoubleWritable(pageRank), new Text(username)); }
From source file:com.snowplowanalytics.hive.serde.CfLogDeserializer.java
License:Open Source License
/** * Deserialize an object out of a Writable blob. In most cases, the return * value of this function will be constant since the function will reuse the * returned object. If the client wants to keep a copy of the object, the * client needs to clone the returned value by calling * ObjectInspectorUtils.getStandardObject(). * /*from w w w . j ava 2s .c o m*/ */ @Override public Object deserialize(Writable field) throws SerDeException { String row = null; if (field instanceof BytesWritable) { BytesWritable b = (BytesWritable) field; try { row = Text.decode(b.getBytes(), 0, b.getLength()); } catch (CharacterCodingException e) { throw new SerDeException(e); } } else if (field instanceof Text) { row = field.toString(); } try { // Construct and return the S3LogStruct from the row data cachedStruct.parse(row); return cachedStruct; } catch (ClassCastException e) { throw new SerDeException(this.getClass().getName() + " expects Text or BytesWritable", e); } catch (Exception e) { throw new SerDeException(e); } }
From source file:com.snowplowanalytics.snowplow.hadoop.hive.SnowPlowEventDeserializer.java
License:Open Source License
/** * Deserialize an object out of a Writable blob. In most cases, the return * value of this function will be constant since the function will reuse the * returned object. If the client wants to keep a copy of the object, the * client needs to clone the returned value by calling * ObjectInspectorUtils.getStandardObject(). * /*from w ww . j a va2s . com*/ * @param blob The Writable object containing a serialized object * @return A Java object representing the contents in the blob. * @throws SerDeException For any exception during initialization */ @Override public Object deserialize(Writable field) throws SerDeException { String row = null; if (field instanceof BytesWritable) { BytesWritable b = (BytesWritable) field; try { row = Text.decode(b.getBytes(), 0, b.getLength()); } catch (CharacterCodingException e) { throw new SerDeException(e); } } else if (field instanceof Text) { row = field.toString(); } try { // Update in place the S3LogStruct with the row data if (cachedStruct.updateByParsing(row)) return cachedStruct; else return null; } catch (ClassCastException e) { throw new SerDeException(this.getClass().getName() + " expects Text or BytesWritable", e); } catch (Exception e) { if (this.continueOnUnexpectedError) { LOG.error("Could not parse row: \"" + row + "\"", e); return null; } else throw new SerDeException(e); } }
From source file:cosmos.accumulo.OrderFilter.java
License:Apache License
@Override public boolean accept(Key k, Value v) { Preconditions.checkNotNull(_holder); k.getColumnQualifier(_holder);/*from w w w.j av a 2 s . com*/ int index = _holder.find(Defaults.NULL_BYTE_STR); // Found a null if (-1 != index) { try { String prefix = Text.decode(_holder.getBytes(), 0, index); return this.cqPrefix.equals(prefix); } catch (CharacterCodingException e) { throw new RuntimeException(e); } } return false; }
From source file:cosmos.impl.DedupingPredicate.java
License:Apache License
@Override public boolean apply(Entry<Key, Value> input) { Preconditions.checkNotNull(input);/*from w w w . j ava2 s . com*/ Preconditions.checkNotNull(input.getKey()); input.getKey().getColumnQualifier(holder); int index = holder.find(Defaults.NULL_BYTE_STR); Preconditions.checkArgument(-1 != index); String uid = null; try { uid = Text.decode(holder.getBytes(), index + 1, holder.getLength() - (index + 1)); } catch (CharacterCodingException e) { throw new RuntimeException(e); } // If we haven't seen this UID yet, note such, and then keep this item if (!uids.contains(uid)) { uids.add(uid); return true; } // Otherwise, don't re-return this item return false; }
From source file:cosmos.impl.GroupByFunction.java
License:Apache License
private String getValueFromKey(Key k) { Preconditions.checkNotNull(k);//from www . j a v a 2 s.c om k.getRow(_holder); int index = _holder.find(Defaults.NULL_BYTE_STR); if (-1 == index) { throw new IllegalArgumentException("Found no null byte in key: " + k); } try { return Text.decode(_holder.getBytes(), index + 1, _holder.getLength() - (index + 1)); } catch (CharacterCodingException e) { throw new IllegalArgumentException(e); } }
From source file:cosmos.impl.IndexToMultimapRecord.java
License:Apache License
@Override public MultimapRecord apply(Entry<Key, Value> input) { Key k = input.getKey();//w ww . j a v a 2 s .c o m Text colqual = k.getColumnQualifier(); int index = colqual.find(Defaults.NULL_BYTE_STR); if (-1 == index) { throw new RuntimeException("Was provided unexpected Key: " + k); } int start = index + 1; try { String docId = Text.decode(colqual.getBytes(), start, colqual.getLength() - start); return sorts.contents(id, docId); } catch (TableNotFoundException e) { throw new RuntimeException(e); } catch (UnexpectedStateException e) { throw new RuntimeException(e); } catch (CharacterCodingException e) { throw new RuntimeException(e); } }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.Gram.java
License:Apache License
/** * @return gram term string//from www.j a va 2 s.c o m */ public String getString() { try { return Text.decode(bytes, 1, length - 1); } catch (CharacterCodingException e) { throw new IllegalStateException("Should not have happened " + e); } }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.GramKey.java
License:Apache License
public String getPrimaryString() { try {/*from www . jav a 2 s.c o m*/ return Text.decode(bytes, 1, primaryLength - 1); } catch (CharacterCodingException e) { throw new IllegalStateException(e); } }
From source file:edu.mit.ll.graphulo.pig.backend.GraphuloOneTableStorage.java
License:Apache License
@Override protected Tuple getTuple(Key key, Value value) throws IOException { SortedMap<Key, Value> rowKVs = WholeRowIterator.decodeRow(key, value); Tuple tuple = TupleFactory.getInstance().newTuple(columns.size() + 1); final Text cfHolder = new Text(); final Text cqHolder = new Text(); final Text row = key.getRow(); int tupleOffset = 0; tuple.set(tupleOffset, new DataByteArray(Text.decode(row.getBytes(), 0, row.getLength()))); for (Column column : this.columns) { tupleOffset++;//from www . ja v a2s . co m switch (column.getType()) { case LITERAL: cfHolder.set(column.getColumnFamily()); if (null != column.getColumnQualifier()) { cqHolder.set(column.getColumnQualifier()); } else { cqHolder.set(EMPTY_TEXT); } // Get the key where our literal would exist (accounting for // "colf:colq" or "colf:" empty colq) Key literalStartKey = new Key(row, cfHolder, cqHolder); SortedMap<Key, Value> tailMap = rowKVs.tailMap(literalStartKey); // Find the element if (tailMap.isEmpty()) { tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY); } else { Key actualKey = tailMap.firstKey(); // Only place it in the tuple if it matches the user // request, avoid using a value from a // key with the wrong colqual if (0 == literalStartKey.compareTo(actualKey, PartialKey.ROW_COLFAM_COLQUAL)) { tuple.set(tupleOffset, new DataByteArray(tailMap.get(actualKey).get())); } else { // This row doesn't have the column we were looking for tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY); } } break; case COLFAM_PREFIX: cfHolder.set(column.getColumnFamily()); Range colfamPrefixRange = Range.prefix(row, cfHolder); Key colfamPrefixStartKey = new Key(row, cfHolder); SortedMap<Key, Value> cfTailMap = rowKVs.tailMap(colfamPrefixStartKey); // Find the element if (cfTailMap.isEmpty()) { tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY); } else { HashMap<String, DataByteArray> tupleMap = new HashMap<String, DataByteArray>(); // Build up a map for all the entries in this row that match // the colfam prefix for (Entry<Key, Value> entry : cfTailMap.entrySet()) { if (colfamPrefixRange.contains(entry.getKey())) { entry.getKey().getColumnFamily(cfHolder); entry.getKey().getColumnQualifier(cqHolder); DataByteArray val = new DataByteArray(entry.getValue().get()); // Avoid adding an extra ':' when colqual is empty if (0 == cqHolder.getLength()) { tupleMap.put(cfHolder.toString(), val); } else { tupleMap.put(cfHolder.toString() + COLON + cqHolder.toString(), val); } } else { break; } } if (!tupleMap.isEmpty()) { tuple.set(tupleOffset, tupleMap); } } break; case COLQUAL_PREFIX: cfHolder.set(column.getColumnFamily()); cqHolder.set(column.getColumnQualifier()); Range colqualPrefixRange = Range.prefix(row, cfHolder, cqHolder); Key colqualPrefixStartKey = new Key(row, cfHolder, cqHolder); SortedMap<Key, Value> cqTailMap = rowKVs.tailMap(colqualPrefixStartKey); if (cqTailMap.isEmpty()) { tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY); } else { HashMap<String, DataByteArray> tupleMap = new HashMap<String, DataByteArray>(); // Build up a map for all the entries in this row that match // the colqual prefix for (Entry<Key, Value> entry : cqTailMap.entrySet()) { if (colqualPrefixRange.contains(entry.getKey())) { entry.getKey().getColumnFamily(cfHolder); entry.getKey().getColumnQualifier(cqHolder); DataByteArray val = new DataByteArray(entry.getValue().get()); // Avoid the extra ':' on empty colqual if (0 == cqHolder.getLength()) { tupleMap.put(cfHolder.toString(), val); } else { tupleMap.put(cfHolder.toString() + COLON + cqHolder.toString(), val); } } else { break; } } if (!tupleMap.isEmpty()) { tuple.set(tupleOffset, tupleMap); } } break; default: break; } } return tuple; }