Example usage for org.apache.hadoop.util UTF8ByteArrayUtils findBytes

List of usage examples for org.apache.hadoop.util UTF8ByteArrayUtils findBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.util UTF8ByteArrayUtils findBytes.

Prototype

public static int findBytes(byte[] utf, int start, int end, byte[] b) 

Source Link

Document

Find the first occurrence of the given bytes b in a UTF-8 encoded string

Usage

From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapRed.java

License:Apache License

/**
 * Split a line into key and value./*from w ww  .ja  v a  2 s.c  o m*/
 * @param line: a byte array of line containing UTF-8 bytes
 * @param key: key of a record
 * @param val: value of a record
 * @throws IOException
 */
void splitKeyVal(byte[] line, int length, Text key, Text val) throws IOException {
    int numKeyFields = getNumOfKeyFields();
    byte[] separator = getFieldSeparator();

    // Need to find numKeyFields separators
    int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
    for (int k = 1; k < numKeyFields && pos != -1; k++) {
        pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, length, separator);
    }
    try {
        if (pos == -1) {
            key.set(line, 0, length);
            val.set("");
        } else {
            StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length);
        }
    } catch (CharacterCodingException e) {
        LOG.warn(StringUtils.stringifyException(e));
    }
}

From source file:fm.last.darling.hbase.HBaseJSONOutputReader.java

License:Apache License

private void interpretKeyandValue(byte[] line, int length) throws IOException {
    // Need to find numKeyFields separators
    int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
    for (int k = 1; k < numKeyFields && pos != -1; k++) {
        pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, length, separator);
    }/* ww  w. ja  va 2  s . c o  m*/

    Text k = new Text();
    Text v = new Text();
    try {
        if (pos == -1) {
            k.set(line, 0, length);
            v.set("");
        } else {
            StreamKeyValUtil.splitKeyVal(line, 0, length, k, v, pos, separator.length);
        }
    } catch (CharacterCodingException e) {
        throw new IOException(e);
    }

    // removing a ' at the start and end of the key
    byte[] keyBytes = trimOuterBytes(k);

    rowkey = new ImmutableBytesWritable(keyBytes);
    put = new Put(keyBytes);

    String tmpV = v.toString();
    String json = tmpV.substring(1, tmpV.length() - 1);
    Map<String, Map> payload;
    try {
        payload = (Map<String, Map>) ObjectBuilder.fromJSON(json); // the 'erased' type?
    } catch (Exception e) {
        throw new IOException("error, fromJson: ", e);
    }

    Set<Map.Entry<String, Map>> entries = payload.entrySet();
    for (Map.Entry<String, Map> entry : entries) {
        String cfq = entry.getKey(); // let's consider not joining family and qualifier at emitter.
        String[] parts = cfq.split(":");
        if (parts.length < 2)
            continue;
        String family = parts[0];
        String qualifier = parts[1];

        Map dict = entry.getValue(); // unchecked.

        // expecting dict to carry 'value',
        Object value = dict.get("value");
        if (value == null)
            continue; // no good.

        // ..and possibly 'timestamp'.
        //Object ts = 0;
        //if (dict.containsKey("timestamp"))
        //ts = dict.get("timestamp");

        put.add(family.getBytes("UTF-8"), qualifier.getBytes("UTF-8"), value.toString().getBytes("UTF-8"));
    }
}