Example usage for org.apache.hadoop.streaming StreamKeyValUtil splitKeyVal

List of usage examples for org.apache.hadoop.streaming StreamKeyValUtil splitKeyVal

Introduction

In this page you can find the example usage for org.apache.hadoop.streaming StreamKeyValUtil splitKeyVal.

Prototype

public static void splitKeyVal(byte[] utf, int start, int length, Text key, Text val, int splitPos,
        int separatorLength) throws IOException 

Source Link

Document

split a UTF-8 byte array into key and value assuming that the delimilator is at splitpos.

Usage

From source file:fm.last.darling.hbase.HBaseJSONOutputReader.java

License:Apache License

private void interpretKeyandValue(byte[] line, int length) throws IOException {
    // Need to find numKeyFields separators
    int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
    for (int k = 1; k < numKeyFields && pos != -1; k++) {
        pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, length, separator);
    }//from   www .  ja  va 2  s .  c  o m

    Text k = new Text();
    Text v = new Text();
    try {
        if (pos == -1) {
            k.set(line, 0, length);
            v.set("");
        } else {
            StreamKeyValUtil.splitKeyVal(line, 0, length, k, v, pos, separator.length);
        }
    } catch (CharacterCodingException e) {
        throw new IOException(e);
    }

    // removing a ' at the start and end of the key
    byte[] keyBytes = trimOuterBytes(k);

    rowkey = new ImmutableBytesWritable(keyBytes);
    put = new Put(keyBytes);

    String tmpV = v.toString();
    String json = tmpV.substring(1, tmpV.length() - 1);
    Map<String, Map> payload;
    try {
        payload = (Map<String, Map>) ObjectBuilder.fromJSON(json); // the 'erased' type?
    } catch (Exception e) {
        throw new IOException("error, fromJson: ", e);
    }

    Set<Map.Entry<String, Map>> entries = payload.entrySet();
    for (Map.Entry<String, Map> entry : entries) {
        String cfq = entry.getKey(); // let's consider not joining family and qualifier at emitter.
        String[] parts = cfq.split(":");
        if (parts.length < 2)
            continue;
        String family = parts[0];
        String qualifier = parts[1];

        Map dict = entry.getValue(); // unchecked.

        // expecting dict to carry 'value',
        Object value = dict.get("value");
        if (value == null)
            continue; // no good.

        // ..and possibly 'timestamp'.
        //Object ts = 0;
        //if (dict.containsKey("timestamp"))
        //ts = dict.get("timestamp");

        put.add(family.getBytes("UTF-8"), qualifier.getBytes("UTF-8"), value.toString().getBytes("UTF-8"));
    }
}