Example usage for org.apache.hadoop.io Text decode

List of usage examples for org.apache.hadoop.io Text decode

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text decode.

Prototype

public static String decode(byte[] utf8, int start, int length) throws CharacterCodingException 

Source Link

Usage

From source file:com.pagerankcalculator.ordering.PageRankSortingMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    int tabIdx1 = value.find("\t");
    int tabIdx2 = value.find("\t", tabIdx1 + 1);

    String username = Text.decode(value.getBytes(), 0, tabIdx1);

    Double pageRank = new Double(Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1)));

    context.write(new DoubleWritable(pageRank), new Text(username));
}

From source file:com.snowplowanalytics.hive.serde.CfLogDeserializer.java

License:Open Source License

/**
 * Deserialize an object out of a Writable blob. In most cases, the return
 * value of this function will be constant since the function will reuse the
 * returned object. If the client wants to keep a copy of the object, the
 * client needs to clone the returned value by calling
 * ObjectInspectorUtils.getStandardObject().
 * /*from  w  w  w  . j ava 2s .c  o  m*/
 */
@Override
public Object deserialize(Writable field) throws SerDeException {
    String row = null;
    if (field instanceof BytesWritable) {
        BytesWritable b = (BytesWritable) field;
        try {
            row = Text.decode(b.getBytes(), 0, b.getLength());
        } catch (CharacterCodingException e) {
            throw new SerDeException(e);
        }
    } else if (field instanceof Text) {
        row = field.toString();
    }
    try {
        // Construct and return the S3LogStruct from the row data
        cachedStruct.parse(row);
        return cachedStruct;
    } catch (ClassCastException e) {
        throw new SerDeException(this.getClass().getName() + " expects Text or BytesWritable", e);
    } catch (Exception e) {
        throw new SerDeException(e);
    }
}

From source file:com.snowplowanalytics.snowplow.hadoop.hive.SnowPlowEventDeserializer.java

License:Open Source License

/**
 * Deserialize an object out of a Writable blob. In most cases, the return
 * value of this function will be constant since the function will reuse the
 * returned object. If the client wants to keep a copy of the object, the
 * client needs to clone the returned value by calling
 * ObjectInspectorUtils.getStandardObject().
 * /*from   w ww  . j  a  va2s  .  com*/
 * @param blob The Writable object containing a serialized object
 * @return A Java object representing the contents in the blob.
 * @throws SerDeException For any exception during initialization
 */
@Override
public Object deserialize(Writable field) throws SerDeException {
    String row = null;
    if (field instanceof BytesWritable) {
        BytesWritable b = (BytesWritable) field;
        try {
            row = Text.decode(b.getBytes(), 0, b.getLength());
        } catch (CharacterCodingException e) {
            throw new SerDeException(e);
        }
    } else if (field instanceof Text) {
        row = field.toString();
    }
    try {
        // Update in place the S3LogStruct with the row data
        if (cachedStruct.updateByParsing(row))
            return cachedStruct;
        else
            return null;
    } catch (ClassCastException e) {
        throw new SerDeException(this.getClass().getName() + " expects Text or BytesWritable", e);
    } catch (Exception e) {
        if (this.continueOnUnexpectedError) {
            LOG.error("Could not parse row: \"" + row + "\"", e);
            return null;
        } else
            throw new SerDeException(e);
    }
}

From source file:cosmos.accumulo.OrderFilter.java

License:Apache License

@Override
public boolean accept(Key k, Value v) {
    Preconditions.checkNotNull(_holder);

    k.getColumnQualifier(_holder);/*from  w w  w.j av a  2  s  . com*/

    int index = _holder.find(Defaults.NULL_BYTE_STR);

    // Found a null
    if (-1 != index) {
        try {
            String prefix = Text.decode(_holder.getBytes(), 0, index);
            return this.cqPrefix.equals(prefix);
        } catch (CharacterCodingException e) {
            throw new RuntimeException(e);
        }
    }

    return false;
}

From source file:cosmos.impl.DedupingPredicate.java

License:Apache License

@Override
public boolean apply(Entry<Key, Value> input) {
    Preconditions.checkNotNull(input);/*from  w  w  w . j ava2 s .  com*/
    Preconditions.checkNotNull(input.getKey());

    input.getKey().getColumnQualifier(holder);

    int index = holder.find(Defaults.NULL_BYTE_STR);

    Preconditions.checkArgument(-1 != index);

    String uid = null;
    try {
        uid = Text.decode(holder.getBytes(), index + 1, holder.getLength() - (index + 1));
    } catch (CharacterCodingException e) {
        throw new RuntimeException(e);
    }

    // If we haven't seen this UID yet, note such, and then keep this item
    if (!uids.contains(uid)) {
        uids.add(uid);
        return true;
    }

    // Otherwise, don't re-return this item
    return false;
}

From source file:cosmos.impl.GroupByFunction.java

License:Apache License

private String getValueFromKey(Key k) {
    Preconditions.checkNotNull(k);//from  www . j  a v  a  2 s.c om

    k.getRow(_holder);

    int index = _holder.find(Defaults.NULL_BYTE_STR);

    if (-1 == index) {
        throw new IllegalArgumentException("Found no null byte in key: " + k);
    }

    try {
        return Text.decode(_holder.getBytes(), index + 1, _holder.getLength() - (index + 1));
    } catch (CharacterCodingException e) {
        throw new IllegalArgumentException(e);
    }
}

From source file:cosmos.impl.IndexToMultimapRecord.java

License:Apache License

@Override
public MultimapRecord apply(Entry<Key, Value> input) {
    Key k = input.getKey();//w  ww .  j a v  a 2  s  .c  o m

    Text colqual = k.getColumnQualifier();

    int index = colqual.find(Defaults.NULL_BYTE_STR);
    if (-1 == index) {
        throw new RuntimeException("Was provided unexpected Key: " + k);
    }

    int start = index + 1;
    try {
        String docId = Text.decode(colqual.getBytes(), start, colqual.getLength() - start);

        return sorts.contents(id, docId);

    } catch (TableNotFoundException e) {
        throw new RuntimeException(e);
    } catch (UnexpectedStateException e) {
        throw new RuntimeException(e);
    } catch (CharacterCodingException e) {
        throw new RuntimeException(e);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.Gram.java

License:Apache License

/**
 * @return gram term string//from  www.j  a  va  2 s.c  o m
 */
public String getString() {
    try {
        return Text.decode(bytes, 1, length - 1);
    } catch (CharacterCodingException e) {
        throw new IllegalStateException("Should not have happened " + e);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.GramKey.java

License:Apache License

public String getPrimaryString() {
    try {/*from  www . jav a  2 s.c o m*/
        return Text.decode(bytes, 1, primaryLength - 1);
    } catch (CharacterCodingException e) {
        throw new IllegalStateException(e);
    }
}

From source file:edu.mit.ll.graphulo.pig.backend.GraphuloOneTableStorage.java

License:Apache License

@Override
protected Tuple getTuple(Key key, Value value) throws IOException {
    SortedMap<Key, Value> rowKVs = WholeRowIterator.decodeRow(key, value);
    Tuple tuple = TupleFactory.getInstance().newTuple(columns.size() + 1);

    final Text cfHolder = new Text();
    final Text cqHolder = new Text();
    final Text row = key.getRow();
    int tupleOffset = 0;

    tuple.set(tupleOffset, new DataByteArray(Text.decode(row.getBytes(), 0, row.getLength())));

    for (Column column : this.columns) {
        tupleOffset++;//from   www . ja v  a2s . co m

        switch (column.getType()) {
        case LITERAL:
            cfHolder.set(column.getColumnFamily());
            if (null != column.getColumnQualifier()) {
                cqHolder.set(column.getColumnQualifier());
            } else {
                cqHolder.set(EMPTY_TEXT);
            }

            // Get the key where our literal would exist (accounting for
            // "colf:colq" or "colf:" empty colq)
            Key literalStartKey = new Key(row, cfHolder, cqHolder);

            SortedMap<Key, Value> tailMap = rowKVs.tailMap(literalStartKey);

            // Find the element
            if (tailMap.isEmpty()) {
                tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY);
            } else {
                Key actualKey = tailMap.firstKey();

                // Only place it in the tuple if it matches the user
                // request, avoid using a value from a
                // key with the wrong colqual
                if (0 == literalStartKey.compareTo(actualKey, PartialKey.ROW_COLFAM_COLQUAL)) {
                    tuple.set(tupleOffset, new DataByteArray(tailMap.get(actualKey).get()));
                } else {
                    // This row doesn't have the column we were looking for
                    tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY);
                }
            }

            break;
        case COLFAM_PREFIX:
            cfHolder.set(column.getColumnFamily());
            Range colfamPrefixRange = Range.prefix(row, cfHolder);
            Key colfamPrefixStartKey = new Key(row, cfHolder);

            SortedMap<Key, Value> cfTailMap = rowKVs.tailMap(colfamPrefixStartKey);

            // Find the element
            if (cfTailMap.isEmpty()) {
                tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY);
            } else {
                HashMap<String, DataByteArray> tupleMap = new HashMap<String, DataByteArray>();

                // Build up a map for all the entries in this row that match
                // the colfam prefix
                for (Entry<Key, Value> entry : cfTailMap.entrySet()) {
                    if (colfamPrefixRange.contains(entry.getKey())) {
                        entry.getKey().getColumnFamily(cfHolder);
                        entry.getKey().getColumnQualifier(cqHolder);
                        DataByteArray val = new DataByteArray(entry.getValue().get());

                        // Avoid adding an extra ':' when colqual is empty
                        if (0 == cqHolder.getLength()) {
                            tupleMap.put(cfHolder.toString(), val);
                        } else {
                            tupleMap.put(cfHolder.toString() + COLON + cqHolder.toString(), val);
                        }
                    } else {
                        break;
                    }
                }

                if (!tupleMap.isEmpty()) {
                    tuple.set(tupleOffset, tupleMap);
                }
            }

            break;
        case COLQUAL_PREFIX:
            cfHolder.set(column.getColumnFamily());
            cqHolder.set(column.getColumnQualifier());
            Range colqualPrefixRange = Range.prefix(row, cfHolder, cqHolder);
            Key colqualPrefixStartKey = new Key(row, cfHolder, cqHolder);

            SortedMap<Key, Value> cqTailMap = rowKVs.tailMap(colqualPrefixStartKey);
            if (cqTailMap.isEmpty()) {
                tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY);
            } else {
                HashMap<String, DataByteArray> tupleMap = new HashMap<String, DataByteArray>();

                // Build up a map for all the entries in this row that match
                // the colqual prefix
                for (Entry<Key, Value> entry : cqTailMap.entrySet()) {
                    if (colqualPrefixRange.contains(entry.getKey())) {
                        entry.getKey().getColumnFamily(cfHolder);
                        entry.getKey().getColumnQualifier(cqHolder);
                        DataByteArray val = new DataByteArray(entry.getValue().get());

                        // Avoid the extra ':' on empty colqual
                        if (0 == cqHolder.getLength()) {
                            tupleMap.put(cfHolder.toString(), val);
                        } else {
                            tupleMap.put(cfHolder.toString() + COLON + cqHolder.toString(), val);
                        }
                    } else {
                        break;
                    }
                }

                if (!tupleMap.isEmpty()) {
                    tuple.set(tupleOffset, tupleMap);
                }
            }

            break;
        default:
            break;
        }
    }

    return tuple;
}