org.apache.cassandra.hadoop.pig.CassandraStorage.java Source code

Introduction

Here is the source code for org.apache.cassandra.hadoop.pig.CassandraStorage.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.hadoop.pig;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.util.*;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.cassandra.auth.PasswordAuthenticator;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.ColumnDefinition;
import org.apache.cassandra.db.marshal.*;
import org.apache.cassandra.db.SystemKeyspace;
import org.apache.cassandra.exceptions.ConfigurationException;
import org.apache.cassandra.exceptions.InvalidRequestException;
import org.apache.cassandra.exceptions.SyntaxException;
import org.apache.cassandra.hadoop.ColumnFamilyRecordReader;
import org.apache.cassandra.hadoop.ConfigHelper;
import org.apache.cassandra.hadoop.HadoopCompat;
import org.apache.cassandra.schema.LegacySchemaTables;
import org.apache.cassandra.serializers.CollectionSerializer;
import org.apache.cassandra.thrift.*;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.Hex;
import org.apache.cassandra.utils.UUIDGen;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.*;
import org.apache.pig.Expression;
import org.apache.pig.LoadFunc;
import org.apache.pig.LoadMetadata;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceStatistics;
import org.apache.pig.StoreFuncInterface;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.data.*;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.thrift.TDeserializer;
import org.apache.thrift.TException;
import org.apache.thrift.TSerializer;
import org.apache.thrift.protocol.TBinaryProtocol;

/**
 * A LoadStoreFunc for retrieving data from and storing data to Cassandra
 *
 * A row from a standard CF will be returned as nested tuples: (key, ((name1, val1), (name2, val2))).
 */
@Deprecated
public class CassandraStorage extends LoadFunc implements StoreFuncInterface, LoadMetadata {
    public final static String PIG_ALLOW_DELETES = "PIG_ALLOW_DELETES";
    public final static String PIG_WIDEROW_INPUT = "PIG_WIDEROW_INPUT";
    public final static String PIG_USE_SECONDARY = "PIG_USE_SECONDARY";

    private final static ByteBuffer BOUND = ByteBufferUtil.EMPTY_BYTE_BUFFER;
    private static final Logger logger = LoggerFactory.getLogger(CassandraStorage.class);

    private ByteBuffer slice_start = BOUND;
    private ByteBuffer slice_end = BOUND;
    private boolean slice_reverse = false;
    private boolean allow_deletes = false;

    private RecordReader<ByteBuffer, Map<ByteBuffer, ColumnFamilyRecordReader.Column>> reader;
    private RecordWriter<ByteBuffer, List<Mutation>> writer;

    private boolean widerows = false;
    private int limit;

    protected String DEFAULT_INPUT_FORMAT;
    protected String DEFAULT_OUTPUT_FORMAT;

    protected enum MarshallerType {
        COMPARATOR, DEFAULT_VALIDATOR, KEY_VALIDATOR, SUBCOMPARATOR
    };

    protected String username;
    protected String password;
    protected String keyspace;
    protected String column_family;
    protected String loadSignature;
    protected String storeSignature;

    protected Configuration conf;
    protected String inputFormatClass;
    protected String outputFormatClass;
    protected int splitSize = 64 * 1024;
    protected String partitionerClass;
    protected boolean usePartitionFilter = false;
    protected String initHostAddress;
    protected String rpcPort;
    protected int nativeProtocolVersion = 1;

    // wide row hacks
    private ByteBuffer lastKey;
    private Map<ByteBuffer, ColumnFamilyRecordReader.Column> lastRow;
    private boolean hasNext = true;

    public CassandraStorage() {
        this(1024);
    }

    /**@param limit number of columns to fetch in a slice */
    public CassandraStorage(int limit) {
        super();
        this.limit = limit;
        DEFAULT_INPUT_FORMAT = "org.apache.cassandra.hadoop.ColumnFamilyInputFormat";
        DEFAULT_OUTPUT_FORMAT = "org.apache.cassandra.hadoop.ColumnFamilyOutputFormat";
    }

    public int getLimit() {
        return limit;
    }

    public void prepareToRead(RecordReader reader, PigSplit split) {
        this.reader = reader;
    }

    /** read wide row*/
    public Tuple getNextWide() throws IOException {
        CfDef cfDef = getCfDef(loadSignature);
        ByteBuffer key = null;
        Tuple tuple = null;
        DefaultDataBag bag = new DefaultDataBag();
        try {
            while (true) {
                hasNext = reader.nextKeyValue();
                if (!hasNext) {
                    if (tuple == null)
                        tuple = TupleFactory.getInstance().newTuple();

                    if (lastRow != null) {
                        if (tuple.size() == 0) // lastRow is a new one
                        {
                            key = reader.getCurrentKey();
                            tuple = keyToTuple(key, cfDef, parseType(cfDef.getKey_validation_class()));
                        }
                        for (Map.Entry<ByteBuffer, ColumnFamilyRecordReader.Column> entry : lastRow.entrySet()) {
                            bag.add(columnToTuple(entry.getValue(), cfDef, parseType(cfDef.getComparator_type())));
                        }
                        lastKey = null;
                        lastRow = null;
                        tuple.append(bag);
                        return tuple;
                    } else {
                        if (tuple.size() == 1) // rare case of just one wide row, key already set
                        {
                            tuple.append(bag);
                            return tuple;
                        } else
                            return null;
                    }
                }
                if (key != null && !(reader.getCurrentKey()).equals(key)) // key changed
                {
                    // read too much, hold on to it for next time
                    lastKey = reader.getCurrentKey();
                    lastRow = reader.getCurrentValue();
                    // but return what we have so far
                    tuple.append(bag);
                    return tuple;
                }
                if (key == null) // only set the key on the first iteration
                {
                    key = reader.getCurrentKey();
                    if (lastKey != null && !(key.equals(lastKey))) // last key only had one value
                    {
                        if (tuple == null)
                            tuple = keyToTuple(lastKey, cfDef, parseType(cfDef.getKey_validation_class()));
                        else
                            addKeyToTuple(tuple, lastKey, cfDef, parseType(cfDef.getKey_validation_class()));
                        for (Map.Entry<ByteBuffer, ColumnFamilyRecordReader.Column> entry : lastRow.entrySet()) {
                            bag.add(columnToTuple(entry.getValue(), cfDef, parseType(cfDef.getComparator_type())));
                        }
                        tuple.append(bag);
                        lastKey = key;
                        lastRow = reader.getCurrentValue();
                        return tuple;
                    }
                    if (tuple == null)
                        tuple = keyToTuple(key, cfDef, parseType(cfDef.getKey_validation_class()));
                    else
                        addKeyToTuple(tuple, lastKey, cfDef, parseType(cfDef.getKey_validation_class()));
                }
                SortedMap<ByteBuffer, ColumnFamilyRecordReader.Column> row = (SortedMap<ByteBuffer, ColumnFamilyRecordReader.Column>) reader
                        .getCurrentValue();
                if (lastRow != null) // prepend what was read last time
                {
                    for (Map.Entry<ByteBuffer, ColumnFamilyRecordReader.Column> entry : lastRow.entrySet()) {
                        bag.add(columnToTuple(entry.getValue(), cfDef, parseType(cfDef.getComparator_type())));
                    }
                    lastKey = null;
                    lastRow = null;
                }
                for (Map.Entry<ByteBuffer, ColumnFamilyRecordReader.Column> entry : row.entrySet()) {
                    bag.add(columnToTuple(entry.getValue(), cfDef, parseType(cfDef.getComparator_type())));
                }
            }
        } catch (InterruptedException e) {
            throw new IOException(e.getMessage());
        }
    }

    /** read next row */
    public Tuple getNext() throws IOException {
        if (widerows)
            return getNextWide();
        try {
            // load the next pair
            if (!reader.nextKeyValue())
                return null;

            CfDef cfDef = getCfDef(loadSignature);
            ByteBuffer key = reader.getCurrentKey();
            Map<ByteBuffer, ColumnFamilyRecordReader.Column> cf = reader.getCurrentValue();
            assert key != null && cf != null;

            // output tuple, will hold the key, each indexed column in a tuple, then a bag of the rest
            // NOTE: we're setting the tuple size here only for the key so we can use setTupleValue on it

            Tuple tuple = keyToTuple(key, cfDef, parseType(cfDef.getKey_validation_class()));
            DefaultDataBag bag = new DefaultDataBag();
            // we must add all the indexed columns first to match the schema
            Map<ByteBuffer, Boolean> added = new HashMap<ByteBuffer, Boolean>(cfDef.column_metadata.size());
            // take care to iterate these in the same order as the schema does
            for (ColumnDef cdef : cfDef.column_metadata) {
                boolean hasColumn = false;
                boolean cql3Table = false;
                try {
                    hasColumn = cf.containsKey(cdef.name);
                } catch (Exception e) {
                    cql3Table = true;
                }
                if (hasColumn) {
                    tuple.append(columnToTuple(cf.get(cdef.name), cfDef, parseType(cfDef.getComparator_type())));
                } else if (!cql3Table) { // otherwise, we need to add an empty tuple to take its place
                    tuple.append(TupleFactory.getInstance().newTuple());
                }
                added.put(cdef.name, true);
            }
            // now add all the other columns
            for (Map.Entry<ByteBuffer, ColumnFamilyRecordReader.Column> entry : cf.entrySet()) {
                if (!added.containsKey(entry.getKey()))
                    bag.add(columnToTuple(entry.getValue(), cfDef, parseType(cfDef.getComparator_type())));
            }
            tuple.append(bag);
            // finally, special top-level indexes if needed
            if (usePartitionFilter) {
                for (ColumnDef cdef : getIndexes()) {
                    Tuple throwaway = columnToTuple(cf.get(cdef.name), cfDef,
                            parseType(cfDef.getComparator_type()));
                    tuple.append(throwaway.get(1));
                }
            }
            return tuple;
        } catch (InterruptedException e) {
            throw new IOException(e.getMessage());
        }
    }

    /** write next row */
    public void putNext(Tuple t) throws IOException {
        /*
        We support two cases for output:
        First, the original output:
        (key, (name, value), (name,value), {(name,value)}) (tuples or bag is optional)
        For supers, we only accept the original output.
        */

        if (t.size() < 1) {
            // simply nothing here, we can't even delete without a key
            logger.warn("Empty output skipped, filter empty tuples to suppress this warning");
            return;
        }
        ByteBuffer key = objToBB(t.get(0));
        if (t.getType(1) == DataType.TUPLE)
            writeColumnsFromTuple(key, t, 1);
        else if (t.getType(1) == DataType.BAG) {
            if (t.size() > 2)
                throw new IOException("No arguments allowed after bag");
            writeColumnsFromBag(key, (DataBag) t.get(1));
        } else
            throw new IOException("Second argument in output must be a tuple or bag");
    }

    /** set hadoop cassandra connection settings */
    protected void setConnectionInformation() throws IOException {
        StorageHelper.setConnectionInformation(conf);
        if (System.getenv(StorageHelper.PIG_INPUT_FORMAT) != null)
            inputFormatClass = getFullyQualifiedClassName(System.getenv(StorageHelper.PIG_INPUT_FORMAT));
        else
            inputFormatClass = DEFAULT_INPUT_FORMAT;
        if (System.getenv(StorageHelper.PIG_OUTPUT_FORMAT) != null)
            outputFormatClass = getFullyQualifiedClassName(System.getenv(StorageHelper.PIG_OUTPUT_FORMAT));
        else
            outputFormatClass = DEFAULT_OUTPUT_FORMAT;
        if (System.getenv(PIG_ALLOW_DELETES) != null)
            allow_deletes = Boolean.parseBoolean(System.getenv(PIG_ALLOW_DELETES));
    }

    /** get the full class name */
    protected String getFullyQualifiedClassName(String classname) {
        return classname.contains(".") ? classname : "org.apache.cassandra.hadoop." + classname;
    }

    /** set read configuration settings */
    public void setLocation(String location, Job job) throws IOException {
        conf = HadoopCompat.getConfiguration(job);
        setLocationFromUri(location);

        if (ConfigHelper.getInputSlicePredicate(conf) == null) {
            SliceRange range = new SliceRange(slice_start, slice_end, slice_reverse, limit);
            SlicePredicate predicate = new SlicePredicate().setSlice_range(range);
            ConfigHelper.setInputSlicePredicate(conf, predicate);
        }
        if (System.getenv(PIG_WIDEROW_INPUT) != null)
            widerows = Boolean.parseBoolean(System.getenv(PIG_WIDEROW_INPUT));
        if (System.getenv(PIG_USE_SECONDARY) != null)
            usePartitionFilter = Boolean.parseBoolean(System.getenv(PIG_USE_SECONDARY));
        if (System.getenv(StorageHelper.PIG_INPUT_SPLIT_SIZE) != null) {
            try {
                ConfigHelper.setInputSplitSize(conf,
                        Integer.parseInt(System.getenv(StorageHelper.PIG_INPUT_SPLIT_SIZE)));
            } catch (NumberFormatException e) {
                throw new IOException("PIG_INPUT_SPLIT_SIZE is not a number", e);
            }
        }

        if (usePartitionFilter && getIndexExpressions() != null)
            ConfigHelper.setInputRange(conf, getIndexExpressions());

        if (username != null && password != null)
            ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password);

        if (splitSize > 0)
            ConfigHelper.setInputSplitSize(conf, splitSize);
        if (partitionerClass != null)
            ConfigHelper.setInputPartitioner(conf, partitionerClass);
        if (rpcPort != null)
            ConfigHelper.setInputRpcPort(conf, rpcPort);
        if (initHostAddress != null)
            ConfigHelper.setInputInitialAddress(conf, initHostAddress);

        ConfigHelper.setInputColumnFamily(conf, keyspace, column_family, widerows);
        setConnectionInformation();

        if (ConfigHelper.getInputRpcPort(conf) == 0)
            throw new IOException("PIG_INPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
        if (ConfigHelper.getInputInitialAddress(conf) == null)
            throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
        if (ConfigHelper.getInputPartitioner(conf) == null)
            throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");
        if (loadSignature == null)
            loadSignature = location;
        initSchema(loadSignature);
    }

    /** set store configuration settings */
    public void setStoreLocation(String location, Job job) throws IOException {
        conf = HadoopCompat.getConfiguration(job);

        // don't combine mappers to a single mapper per node
        conf.setBoolean("pig.noSplitCombination", true);
        setLocationFromUri(location);

        if (username != null && password != null)
            ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password);
        if (splitSize > 0)
            ConfigHelper.setInputSplitSize(conf, splitSize);
        if (partitionerClass != null)
            ConfigHelper.setOutputPartitioner(conf, partitionerClass);
        if (rpcPort != null) {
            ConfigHelper.setOutputRpcPort(conf, rpcPort);
            ConfigHelper.setInputRpcPort(conf, rpcPort);
        }
        if (initHostAddress != null) {
            ConfigHelper.setOutputInitialAddress(conf, initHostAddress);
            ConfigHelper.setInputInitialAddress(conf, initHostAddress);
        }

        ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family);
        setConnectionInformation();

        if (ConfigHelper.getOutputRpcPort(conf) == 0)
            throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
        if (ConfigHelper.getOutputInitialAddress(conf) == null)
            throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
        if (ConfigHelper.getOutputPartitioner(conf) == null)
            throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");

        // we have to do this again here for the check in writeColumnsFromTuple
        if (System.getenv(PIG_USE_SECONDARY) != null)
            usePartitionFilter = Boolean.parseBoolean(System.getenv(PIG_USE_SECONDARY));

        initSchema(storeSignature);
    }

    /** Methods to get the column family schema from Cassandra */
    protected void initSchema(String signature) throws IOException {
        Properties properties = UDFContext.getUDFContext().getUDFProperties(CassandraStorage.class);

        // Only get the schema if we haven't already gotten it
        if (!properties.containsKey(signature)) {
            try {
                Cassandra.Client client = ConfigHelper.getClientFromInputAddressList(conf);
                client.set_keyspace(keyspace);

                if (username != null && password != null) {
                    Map<String, String> credentials = new HashMap<String, String>(2);
                    credentials.put(PasswordAuthenticator.USERNAME_KEY, username);
                    credentials.put(PasswordAuthenticator.PASSWORD_KEY, password);

                    try {
                        client.login(new AuthenticationRequest(credentials));
                    } catch (AuthenticationException e) {
                        logger.error("Authentication exception: invalid username and/or password");
                        throw new IOException(e);
                    }
                }

                // compose the CfDef for the columfamily
                CfDef cfDef = getCfDef(client);

                if (cfDef != null) {
                    StringBuilder sb = new StringBuilder();
                    sb.append(cfdefToString(cfDef));
                    properties.setProperty(signature, sb.toString());
                } else
                    throw new IOException(
                            String.format("Table '%s' not found in keyspace '%s'", column_family, keyspace));
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    }

    public void checkSchema(ResourceSchema schema) throws IOException {
        // we don't care about types, they all get casted to ByteBuffers
    }

    /** define the schema */
    public ResourceSchema getSchema(String location, Job job) throws IOException {
        setLocation(location, job);
        CfDef cfDef = getCfDef(loadSignature);
        if (cfDef.column_type.equals("Super"))
            return null;
        /*
        Our returned schema should look like this:
        (key, index1:(name, value), index2:(name, value), columns:{(name, value)})
        Which is to say, columns that have metadata will be returned as named tuples, but unknown columns will go into a bag.
        This way, wide rows can still be handled by the bag, but known columns can easily be referenced.
         */

        // top-level schema, no type
        ResourceSchema schema = new ResourceSchema();

        // get default marshallers and validators
        Map<MarshallerType, AbstractType> marshallers = getDefaultMarshallers(cfDef);
        Map<ByteBuffer, AbstractType> validators = getValidatorMap(cfDef);

        // add key
        ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
        keyFieldSchema.setName("key");
        keyFieldSchema.setType(StorageHelper.getPigType(marshallers.get(MarshallerType.KEY_VALIDATOR)));

        ResourceSchema bagSchema = new ResourceSchema();
        ResourceFieldSchema bagField = new ResourceFieldSchema();
        bagField.setType(DataType.BAG);
        bagField.setName("columns");
        // inside the bag, place one tuple with the default comparator/validator schema
        ResourceSchema bagTupleSchema = new ResourceSchema();
        ResourceFieldSchema bagTupleField = new ResourceFieldSchema();
        bagTupleField.setType(DataType.TUPLE);
        ResourceFieldSchema bagcolSchema = new ResourceFieldSchema();
        ResourceFieldSchema bagvalSchema = new ResourceFieldSchema();
        bagcolSchema.setName("name");
        bagvalSchema.setName("value");
        bagcolSchema.setType(StorageHelper.getPigType(marshallers.get(MarshallerType.COMPARATOR)));
        bagvalSchema.setType(StorageHelper.getPigType(marshallers.get(MarshallerType.DEFAULT_VALIDATOR)));
        bagTupleSchema.setFields(new ResourceFieldSchema[] { bagcolSchema, bagvalSchema });
        bagTupleField.setSchema(bagTupleSchema);
        bagSchema.setFields(new ResourceFieldSchema[] { bagTupleField });
        bagField.setSchema(bagSchema);

        // will contain all fields for this schema
        List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>();
        // add the key first, then the indexed columns, and finally the bag
        allSchemaFields.add(keyFieldSchema);

        if (!widerows) {
            // defined validators/indexes
            for (ColumnDef cdef : cfDef.column_metadata) {
                // make a new tuple for each col/val pair
                ResourceSchema innerTupleSchema = new ResourceSchema();
                ResourceFieldSchema innerTupleField = new ResourceFieldSchema();
                innerTupleField.setType(DataType.TUPLE);
                innerTupleField.setSchema(innerTupleSchema);
                innerTupleField.setName(new String(cdef.getName()));

                ResourceFieldSchema idxColSchema = new ResourceFieldSchema();
                idxColSchema.setName("name");
                idxColSchema.setType(StorageHelper.getPigType(marshallers.get(MarshallerType.COMPARATOR)));

                ResourceFieldSchema valSchema = new ResourceFieldSchema();
                AbstractType validator = validators.get(cdef.name);
                if (validator == null)
                    validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR);
                valSchema.setName("value");
                valSchema.setType(StorageHelper.getPigType(validator));

                innerTupleSchema.setFields(new ResourceFieldSchema[] { idxColSchema, valSchema });
                allSchemaFields.add(innerTupleField);
            }
        }

        // bag at the end for unknown columns
        allSchemaFields.add(bagField);

        // add top-level index elements if needed
        if (usePartitionFilter) {
            for (ColumnDef cdef : getIndexes()) {
                ResourceFieldSchema idxSchema = new ResourceFieldSchema();
                idxSchema.setName("index_" + new String(cdef.getName()));
                AbstractType validator = validators.get(cdef.name);
                if (validator == null)
                    validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR);
                idxSchema.setType(StorageHelper.getPigType(validator));
                allSchemaFields.add(idxSchema);
            }
        }
        // top level schema contains everything
        schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()]));
        return schema;
    }

    /** set partition filter */
    public void setPartitionFilter(Expression partitionFilter) throws IOException {
        UDFContext context = UDFContext.getUDFContext();
        Properties property = context.getUDFProperties(CassandraStorage.class);
        property.setProperty(StorageHelper.PARTITION_FILTER_SIGNATURE,
                indexExpressionsToString(filterToIndexExpressions(partitionFilter)));
    }

    /** prepare writer */
    public void prepareToWrite(RecordWriter writer) {
        this.writer = writer;
    }

    /** convert object to ByteBuffer */
    protected ByteBuffer objToBB(Object o) {
        if (o == null)
            return nullToBB();
        if (o instanceof java.lang.String)
            return ByteBuffer.wrap(new DataByteArray((String) o).get());
        if (o instanceof Integer)
            return Int32Type.instance.decompose((Integer) o);
        if (o instanceof Long)
            return LongType.instance.decompose((Long) o);
        if (o instanceof Float)
            return FloatType.instance.decompose((Float) o);
        if (o instanceof Double)
            return DoubleType.instance.decompose((Double) o);
        if (o instanceof UUID)
            return ByteBuffer.wrap(UUIDGen.decompose((UUID) o));
        if (o instanceof Tuple) {
            List<Object> objects = ((Tuple) o).getAll();
            //collections
            if (objects.size() > 0 && objects.get(0) instanceof String) {
                String collectionType = (String) objects.get(0);
                if ("set".equalsIgnoreCase(collectionType) || "list".equalsIgnoreCase(collectionType))
                    return objToListOrSetBB(objects.subList(1, objects.size()));
                else if ("map".equalsIgnoreCase(collectionType))
                    return objToMapBB(objects.subList(1, objects.size()));

            }
            return objToCompositeBB(objects);
        }

        return ByteBuffer.wrap(((DataByteArray) o).get());
    }

    private ByteBuffer objToListOrSetBB(List<Object> objects) {
        List<ByteBuffer> serialized = new ArrayList<ByteBuffer>(objects.size());
        for (Object sub : objects) {
            ByteBuffer buffer = objToBB(sub);
            serialized.add(buffer);
        }
        // NOTE: using protocol v1 serialization format for collections so as to not break
        // compatibility. Not sure if that's the right thing.
        return CollectionSerializer.pack(serialized, objects.size(), 1);
    }

    private ByteBuffer objToMapBB(List<Object> objects) {
        List<ByteBuffer> serialized = new ArrayList<ByteBuffer>(objects.size() * 2);
        for (Object sub : objects) {
            List<Object> keyValue = ((Tuple) sub).getAll();
            for (Object entry : keyValue) {
                ByteBuffer buffer = objToBB(entry);
                serialized.add(buffer);
            }
        }
        // NOTE: using protocol v1 serialization format for collections so as to not break
        // compatibility. Not sure if that's the right thing.
        return CollectionSerializer.pack(serialized, objects.size(), 1);
    }

    private ByteBuffer objToCompositeBB(List<Object> objects) {
        List<ByteBuffer> serialized = new ArrayList<ByteBuffer>(objects.size());
        int totalLength = 0;
        for (Object sub : objects) {
            ByteBuffer buffer = objToBB(sub);
            serialized.add(buffer);
            totalLength += 2 + buffer.remaining() + 1;
        }
        ByteBuffer out = ByteBuffer.allocate(totalLength);
        for (ByteBuffer bb : serialized) {
            int length = bb.remaining();
            out.put((byte) ((length >> 8) & 0xFF));
            out.put((byte) (length & 0xFF));
            out.put(bb);
            out.put((byte) 0);
        }
        out.flip();
        return out;
    }

    /** write tuple data to cassandra */
    private void writeColumnsFromTuple(ByteBuffer key, Tuple t, int offset) throws IOException {
        ArrayList<Mutation> mutationList = new ArrayList<Mutation>();
        for (int i = offset; i < t.size(); i++) {
            if (t.getType(i) == DataType.BAG)
                writeColumnsFromBag(key, (DataBag) t.get(i));
            else if (t.getType(i) == DataType.TUPLE) {
                Tuple inner = (Tuple) t.get(i);
                if (inner.size() > 0) // may be empty, for an indexed column that wasn't present
                    mutationList.add(mutationFromTuple(inner));
            } else if (!usePartitionFilter) {
                throw new IOException("Output type was not a bag or a tuple");
            }
        }
        if (mutationList.size() > 0)
            writeMutations(key, mutationList);
    }

    /** compose Cassandra mutation from tuple */
    private Mutation mutationFromTuple(Tuple t) throws IOException {
        Mutation mutation = new Mutation();
        if (t.get(1) == null) {
            if (allow_deletes) {
                mutation.deletion = new Deletion();
                mutation.deletion.predicate = new org.apache.cassandra.thrift.SlicePredicate();
                mutation.deletion.predicate.column_names = Arrays.asList(objToBB(t.get(0)));
                mutation.deletion.setTimestamp(FBUtilities.timestampMicros());
            } else
                throw new IOException("null found but deletes are disabled, set " + PIG_ALLOW_DELETES
                        + "=true in environment or allow_deletes=true in URL to enable");
        } else {
            org.apache.cassandra.thrift.Column column = new org.apache.cassandra.thrift.Column();
            column.setName(objToBB(t.get(0)));
            column.setValue(objToBB(t.get(1)));
            column.setTimestamp(FBUtilities.timestampMicros());
            mutation.column_or_supercolumn = new ColumnOrSuperColumn();
            mutation.column_or_supercolumn.column = column;
        }
        return mutation;
    }

    /** write bag data to Cassandra */
    private void writeColumnsFromBag(ByteBuffer key, DataBag bag) throws IOException {
        List<Mutation> mutationList = new ArrayList<Mutation>();
        for (Tuple pair : bag) {
            Mutation mutation = new Mutation();
            if (DataType.findType(pair.get(1)) == DataType.BAG) // supercolumn
            {
                SuperColumn sc = new SuperColumn();
                sc.setName(objToBB(pair.get(0)));
                List<org.apache.cassandra.thrift.Column> columns = new ArrayList<org.apache.cassandra.thrift.Column>();
                for (Tuple subcol : (DataBag) pair.get(1)) {
                    org.apache.cassandra.thrift.Column column = new org.apache.cassandra.thrift.Column();
                    column.setName(objToBB(subcol.get(0)));
                    column.setValue(objToBB(subcol.get(1)));
                    column.setTimestamp(FBUtilities.timestampMicros());
                    columns.add(column);
                }
                if (columns.isEmpty()) {
                    if (allow_deletes) {
                        mutation.deletion = new Deletion();
                        mutation.deletion.super_column = objToBB(pair.get(0));
                        mutation.deletion.setTimestamp(FBUtilities.timestampMicros());
                    } else
                        throw new IOException(
                                "SuperColumn deletion attempted with empty bag, but deletes are disabled, set "
                                        + PIG_ALLOW_DELETES
                                        + "=true in environment or allow_deletes=true in URL to enable");
                } else {
                    sc.columns = columns;
                    mutation.column_or_supercolumn = new ColumnOrSuperColumn();
                    mutation.column_or_supercolumn.super_column = sc;
                }
            } else
                mutation = mutationFromTuple(pair);
            mutationList.add(mutation);
            // for wide rows, we need to limit the amount of mutations we write at once
            if (mutationList.size() >= 10) // arbitrary, CFOF will re-batch this up, and BOF won't care
            {
                writeMutations(key, mutationList);
                mutationList.clear();
            }
        }
        // write the last batch
        if (mutationList.size() > 0)
            writeMutations(key, mutationList);
    }

    /** write mutation to Cassandra */
    private void writeMutations(ByteBuffer key, List<Mutation> mutations) throws IOException {
        try {
            writer.write(key, mutations);
        } catch (InterruptedException e) {
            throw new IOException(e);
        }
    }

    /** get a list of columns with defined index*/
    protected List<ColumnDef> getIndexes() throws IOException {
        CfDef cfdef = getCfDef(loadSignature);
        List<ColumnDef> indexes = new ArrayList<ColumnDef>();
        for (ColumnDef cdef : cfdef.column_metadata) {
            if (cdef.index_type != null)
                indexes.add(cdef);
        }
        return indexes;
    }

    /** get a list of Cassandra IndexExpression from Pig expression */
    private List<IndexExpression> filterToIndexExpressions(Expression expression) throws IOException {
        List<IndexExpression> indexExpressions = new ArrayList<IndexExpression>();
        Expression.BinaryExpression be = (Expression.BinaryExpression) expression;
        ByteBuffer name = ByteBuffer.wrap(be.getLhs().toString().getBytes());
        ByteBuffer value = ByteBuffer.wrap(be.getRhs().toString().getBytes());
        switch (expression.getOpType()) {
        case OP_EQ:
            indexExpressions.add(new IndexExpression(name, IndexOperator.EQ, value));
            break;
        case OP_GE:
            indexExpressions.add(new IndexExpression(name, IndexOperator.GTE, value));
            break;
        case OP_GT:
            indexExpressions.add(new IndexExpression(name, IndexOperator.GT, value));
            break;
        case OP_LE:
            indexExpressions.add(new IndexExpression(name, IndexOperator.LTE, value));
            break;
        case OP_LT:
            indexExpressions.add(new IndexExpression(name, IndexOperator.LT, value));
            break;
        case OP_AND:
            indexExpressions.addAll(filterToIndexExpressions(be.getLhs()));
            indexExpressions.addAll(filterToIndexExpressions(be.getRhs()));
            break;
        default:
            throw new IOException("Unsupported expression type: " + expression.getOpType().name());
        }
        return indexExpressions;
    }

    /** convert a list of index expression to string */
    private static String indexExpressionsToString(List<IndexExpression> indexExpressions) throws IOException {
        assert indexExpressions != null;
        // oh, you thought cfdefToString was awful?
        IndexClause indexClause = new IndexClause();
        indexClause.setExpressions(indexExpressions);
        indexClause.setStart_key("".getBytes());
        TSerializer serializer = new TSerializer(new TBinaryProtocol.Factory());
        try {
            return Hex.bytesToHex(serializer.serialize(indexClause));
        } catch (TException e) {
            throw new IOException(e);
        }
    }

    /** convert string to a list of index expression */
    private static List<IndexExpression> indexExpressionsFromString(String ie) throws IOException {
        assert ie != null;
        TDeserializer deserializer = new TDeserializer(new TBinaryProtocol.Factory());
        IndexClause indexClause = new IndexClause();
        try {
            deserializer.deserialize(indexClause, Hex.hexToBytes(ie));
        } catch (TException e) {
            throw new IOException(e);
        }
        return indexClause.getExpressions();
    }

    public ResourceStatistics getStatistics(String location, Job job) {
        return null;
    }

    public void cleanupOnFailure(String failure, Job job) {
    }

    public void cleanupOnSuccess(String location, Job job) throws IOException {
    }

    /** StoreFunc methods */
    public void setStoreFuncUDFContextSignature(String signature) {
        this.storeSignature = signature;
    }

    public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException {
        return relativeToAbsolutePath(location, curDir);
    }

    /** output format */
    public OutputFormat getOutputFormat() throws IOException {
        try {
            return FBUtilities.construct(outputFormatClass, "outputformat");
        } catch (ConfigurationException e) {
            throw new IOException(e);
        }
    }

    @Override
    public InputFormat getInputFormat() throws IOException {
        try {
            return FBUtilities.construct(inputFormatClass, "inputformat");
        } catch (ConfigurationException e) {
            throw new IOException(e);
        }
    }

    /** get a list of index expression */
    private List<IndexExpression> getIndexExpressions() throws IOException {
        UDFContext context = UDFContext.getUDFContext();
        Properties property = context.getUDFProperties(CassandraStorage.class);
        if (property.getProperty(StorageHelper.PARTITION_FILTER_SIGNATURE) != null)
            return indexExpressionsFromString(property.getProperty(StorageHelper.PARTITION_FILTER_SIGNATURE));
        else
            return null;
    }

    /** get a list of column for the column family */
    protected List<ColumnDef> getColumnMetadata(Cassandra.Client client)
            throws TException, CharacterCodingException, InvalidRequestException, ConfigurationException {
        return getColumnMeta(client, true, true);
    }

    /** get column meta data */
    protected List<ColumnDef> getColumnMeta(Cassandra.Client client, boolean cassandraStorage,
            boolean includeCompactValueColumn)
            throws org.apache.cassandra.thrift.InvalidRequestException, UnavailableException, TimedOutException,
            SchemaDisagreementException, TException, CharacterCodingException,
            org.apache.cassandra.exceptions.InvalidRequestException, ConfigurationException, NotFoundException {
        String query = String.format(
                "SELECT column_name, validator, index_type, type " + "FROM %s.%s "
                        + "WHERE keyspace_name = '%s' AND columnfamily_name = '%s'",
                SystemKeyspace.NAME, LegacySchemaTables.COLUMNS, keyspace, column_family);

        CqlResult result = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE,
                ConsistencyLevel.ONE);

        List<CqlRow> rows = result.rows;
        List<ColumnDef> columnDefs = new ArrayList<ColumnDef>();
        if (rows == null || rows.isEmpty()) {
            // if CassandraStorage, just return the empty list
            if (cassandraStorage)
                return columnDefs;

            // otherwise for CqlNativeStorage, check metadata for classic thrift tables
            CFMetaData cfm = getCFMetaData(keyspace, column_family, client);
            for (ColumnDefinition def : cfm.regularAndStaticColumns()) {
                ColumnDef cDef = new ColumnDef();
                String columnName = def.name.toString();
                String type = def.type.toString();
                logger.trace("name: {}, type: {} ", columnName, type);
                cDef.name = ByteBufferUtil.bytes(columnName);
                cDef.validation_class = type;
                columnDefs.add(cDef);
            }
            // we may not need to include the value column for compact tables as we
            // could have already processed it as schema_columnfamilies.value_alias
            if (columnDefs.size() == 0 && includeCompactValueColumn && cfm.compactValueColumn() != null) {
                ColumnDefinition def = cfm.compactValueColumn();
                if ("value".equals(def.name.toString())) {
                    ColumnDef cDef = new ColumnDef();
                    cDef.name = def.name.bytes;
                    cDef.validation_class = def.type.toString();
                    columnDefs.add(cDef);
                }
            }
            return columnDefs;
        }

        Iterator<CqlRow> iterator = rows.iterator();
        while (iterator.hasNext()) {
            CqlRow row = iterator.next();
            ColumnDef cDef = new ColumnDef();
            String type = ByteBufferUtil.string(row.getColumns().get(3).value);
            if (!type.equals("regular"))
                continue;
            cDef.setName(ByteBufferUtil.clone(row.getColumns().get(0).value));
            cDef.validation_class = ByteBufferUtil.string(row.getColumns().get(1).value);
            ByteBuffer indexType = row.getColumns().get(2).value;
            if (indexType != null)
                cDef.index_type = getIndexType(ByteBufferUtil.string(indexType));
            columnDefs.add(cDef);
        }
        return columnDefs;
    }

    /** get CFMetaData of a column family */
    protected CFMetaData getCFMetaData(String ks, String cf, Cassandra.Client client)
            throws NotFoundException, org.apache.cassandra.thrift.InvalidRequestException, TException,
            org.apache.cassandra.exceptions.InvalidRequestException, ConfigurationException {
        KsDef ksDef = client.describe_keyspace(ks);
        for (CfDef cfDef : ksDef.cf_defs) {
            if (cfDef.name.equalsIgnoreCase(cf))
                return ThriftConversion.fromThrift(cfDef);
        }
        return null;
    }

    /** get index type from string */
    protected IndexType getIndexType(String type) {
        type = type.toLowerCase();
        if ("keys".equals(type))
            return IndexType.KEYS;
        else if ("custom".equals(type))
            return IndexType.CUSTOM;
        else if ("composites".equals(type))
            return IndexType.COMPOSITES;
        else
            return null;
    }

    /** return partition keys */
    public String[] getPartitionKeys(String location, Job job) throws IOException {
        if (!usePartitionFilter)
            return null;
        List<ColumnDef> indexes = getIndexes();
        String[] partitionKeys = new String[indexes.size()];
        for (int i = 0; i < indexes.size(); i++) {
            partitionKeys[i] = new String(indexes.get(i).getName());
        }
        return partitionKeys;
    }

    /** convert key to a tuple */
    private Tuple keyToTuple(ByteBuffer key, CfDef cfDef, AbstractType comparator) throws IOException {
        Tuple tuple = TupleFactory.getInstance().newTuple(1);
        addKeyToTuple(tuple, key, cfDef, comparator);
        return tuple;
    }

    /** add key to a tuple */
    private void addKeyToTuple(Tuple tuple, ByteBuffer key, CfDef cfDef, AbstractType comparator)
            throws IOException {
        if (comparator instanceof AbstractCompositeType) {
            StorageHelper.setTupleValue(tuple, 0, composeComposite((AbstractCompositeType) comparator, key));
        } else {
            StorageHelper.setTupleValue(tuple, 0, StorageHelper.cassandraToObj(
                    getDefaultMarshallers(cfDef).get(MarshallerType.KEY_VALIDATOR), key, nativeProtocolVersion));
        }

    }

    /** Deconstructs a composite type to a Tuple. */
    protected Tuple composeComposite(AbstractCompositeType comparator, ByteBuffer name) throws IOException {
        List<AbstractCompositeType.CompositeComponent> result = comparator.deconstruct(name);
        Tuple t = TupleFactory.getInstance().newTuple(result.size());
        for (int i = 0; i < result.size(); i++)
            StorageHelper.setTupleValue(t, i, StorageHelper.cassandraToObj(result.get(i).comparator,
                    result.get(i).value, nativeProtocolVersion));

        return t;
    }

    /** cassandra://[username:password@]<keyspace>/<columnfamily>[?slice_start=<start>&slice_end=<end>
     * [&reversed=true][&limit=1][&allow_deletes=true][&widerows=true]
     * [&use_secondary=true][&comparator=<comparator>][&partitioner=<partitioner>]]*/
    private void setLocationFromUri(String location) throws IOException {
        try {
            if (!location.startsWith("cassandra://"))
                throw new Exception("Bad scheme." + location);

            String[] urlParts = location.split("\\?");
            if (urlParts.length > 1) {
                Map<String, String> urlQuery = getQueryMap(urlParts[1]);
                AbstractType comparator = BytesType.instance;
                if (urlQuery.containsKey("comparator"))
                    comparator = TypeParser.parse(urlQuery.get("comparator"));
                if (urlQuery.containsKey("slice_start"))
                    slice_start = comparator.fromString(urlQuery.get("slice_start"));
                if (urlQuery.containsKey("slice_end"))
                    slice_end = comparator.fromString(urlQuery.get("slice_end"));
                if (urlQuery.containsKey("reversed"))
                    slice_reverse = Boolean.parseBoolean(urlQuery.get("reversed"));
                if (urlQuery.containsKey("limit"))
                    limit = Integer.parseInt(urlQuery.get("limit"));
                if (urlQuery.containsKey("allow_deletes"))
                    allow_deletes = Boolean.parseBoolean(urlQuery.get("allow_deletes"));
                if (urlQuery.containsKey("widerows"))
                    widerows = Boolean.parseBoolean(urlQuery.get("widerows"));
                if (urlQuery.containsKey("use_secondary"))
                    usePartitionFilter = Boolean.parseBoolean(urlQuery.get("use_secondary"));
                if (urlQuery.containsKey("split_size"))
                    splitSize = Integer.parseInt(urlQuery.get("split_size"));
                if (urlQuery.containsKey("partitioner"))
                    partitionerClass = urlQuery.get("partitioner");
                if (urlQuery.containsKey("init_address"))
                    initHostAddress = urlQuery.get("init_address");
                if (urlQuery.containsKey("rpc_port"))
                    rpcPort = urlQuery.get("rpc_port");
            }
            String[] parts = urlParts[0].split("/+");
            String[] credentialsAndKeyspace = parts[1].split("@");
            if (credentialsAndKeyspace.length > 1) {
                String[] credentials = credentialsAndKeyspace[0].split(":");
                username = credentials[0];
                password = credentials[1];
                keyspace = credentialsAndKeyspace[1];
            } else {
                keyspace = parts[1];
            }
            column_family = parts[2];
        } catch (Exception e) {
            throw new IOException("Expected 'cassandra://[username:password@]<keyspace>/<table>"
                    + "[?slice_start=<start>&slice_end=<end>[&reversed=true][&limit=1]"
                    + "[&allow_deletes=true][&widerows=true][&use_secondary=true]"
                    + "[&comparator=<comparator>][&split_size=<size>][&partitioner=<partitioner>]"
                    + "[&init_address=<host>][&rpc_port=<port>]]': " + e.getMessage());
        }
    }

    /** decompose the query to store the parameters in a map */
    public static Map<String, String> getQueryMap(String query) throws UnsupportedEncodingException {
        String[] params = query.split("&");
        Map<String, String> map = new HashMap<String, String>(params.length);
        for (String param : params) {
            String[] keyValue = param.split("=");
            map.put(keyValue[0], URLDecoder.decode(keyValue[1], "UTF-8"));
        }
        return map;
    }

    public ByteBuffer nullToBB() {
        return null;
    }

    /** return the CfInfo for the column family */
    protected CfDef getCfDef(Cassandra.Client client) throws org.apache.cassandra.thrift.InvalidRequestException,
            UnavailableException, TimedOutException, SchemaDisagreementException, TException, NotFoundException,
            org.apache.cassandra.exceptions.InvalidRequestException, ConfigurationException, IOException {
        // get CF meta data
        String query = String.format(
                "SELECT type, comparator, subcomparator, default_validator, key_validator " + "FROM %s.%s "
                        + "WHERE keyspace_name = '%s' AND columnfamily_name = '%s'",
                SystemKeyspace.NAME, LegacySchemaTables.COLUMNFAMILIES, keyspace, column_family);

        CqlResult result = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE,
                ConsistencyLevel.ONE);

        if (result == null || result.rows == null || result.rows.isEmpty())
            return null;

        Iterator<CqlRow> iteraRow = result.rows.iterator();
        CfDef cfDef = new CfDef();
        cfDef.keyspace = keyspace;
        cfDef.name = column_family;
        if (iteraRow.hasNext()) {
            CqlRow cqlRow = iteraRow.next();

            cfDef.column_type = ByteBufferUtil.string(cqlRow.columns.get(0).value);
            cfDef.comparator_type = ByteBufferUtil.string(cqlRow.columns.get(1).value);
            ByteBuffer subComparator = cqlRow.columns.get(2).value;
            if (subComparator != null)
                cfDef.subcomparator_type = ByteBufferUtil.string(subComparator);
            cfDef.default_validation_class = ByteBufferUtil.string(cqlRow.columns.get(3).value);
            cfDef.key_validation_class = ByteBufferUtil.string(cqlRow.columns.get(4).value);
        }
        cfDef.column_metadata = getColumnMetadata(client);
        return cfDef;
    }

    /** get the columnfamily definition for the signature */
    protected CfDef getCfDef(String signature) throws IOException {
        UDFContext context = UDFContext.getUDFContext();
        Properties property = context.getUDFProperties(CassandraStorage.class);
        String prop = property.getProperty(signature);
        return cfdefFromString(prop);
    }

    /** convert string back to CfDef */
    protected static CfDef cfdefFromString(String st) throws IOException {
        assert st != null;
        TDeserializer deserializer = new TDeserializer(new TBinaryProtocol.Factory());
        CfDef cfDef = new CfDef();
        try {
            deserializer.deserialize(cfDef, Hex.hexToBytes(st));
        } catch (TException e) {
            throw new IOException(e);
        }
        return cfDef;
    }

    /** convert CfDef to string */
    protected static String cfdefToString(CfDef cfDef) throws IOException {
        assert cfDef != null;
        // this is so awful it's kind of cool!
        TSerializer serializer = new TSerializer(new TBinaryProtocol.Factory());
        try {
            return Hex.bytesToHex(serializer.serialize(cfDef));
        } catch (TException e) {
            throw new IOException(e);
        }
    }

    /** parse the string to a cassandra data type */
    protected AbstractType parseType(String type) throws IOException {
        try {
            // always treat counters like longs, specifically CCT.compose is not what we need
            if (type != null && type.equals("org.apache.cassandra.db.marshal.CounterColumnType"))
                return LongType.instance;
            return TypeParser.parse(type);
        } catch (ConfigurationException e) {
            throw new IOException(e);
        } catch (SyntaxException e) {
            throw new IOException(e);
        }
    }

    /** convert a column to a tuple */
    protected Tuple columnToTuple(ColumnFamilyRecordReader.Column column, CfDef cfDef, AbstractType comparator)
            throws IOException {
        Tuple pair = TupleFactory.getInstance().newTuple(2);

        // name
        if (comparator instanceof AbstractCompositeType)
            StorageHelper.setTupleValue(pair, 0, composeComposite((AbstractCompositeType) comparator, column.name));
        else
            StorageHelper.setTupleValue(pair, 0,
                    StorageHelper.cassandraToObj(comparator, column.name, nativeProtocolVersion));

        // value
        Map<ByteBuffer, AbstractType> validators = getValidatorMap(cfDef);
        if (validators.get(column.name) == null) {
            Map<MarshallerType, AbstractType> marshallers = getDefaultMarshallers(cfDef);
            StorageHelper.setTupleValue(pair, 1, StorageHelper.cassandraToObj(
                    marshallers.get(MarshallerType.DEFAULT_VALIDATOR), column.value, nativeProtocolVersion));
        } else
            StorageHelper.setTupleValue(pair, 1,
                    StorageHelper.cassandraToObj(validators.get(column.name), column.value, nativeProtocolVersion));
        return pair;
    }

    /** construct a map to store the mashaller type to cassandra data type mapping */
    protected Map<MarshallerType, AbstractType> getDefaultMarshallers(CfDef cfDef) throws IOException {
        Map<MarshallerType, AbstractType> marshallers = new EnumMap<MarshallerType, AbstractType>(
                MarshallerType.class);
        AbstractType comparator;
        AbstractType subcomparator;
        AbstractType default_validator;
        AbstractType key_validator;

        comparator = parseType(cfDef.getComparator_type());
        subcomparator = parseType(cfDef.getSubcomparator_type());
        default_validator = parseType(cfDef.getDefault_validation_class());
        key_validator = parseType(cfDef.getKey_validation_class());

        marshallers.put(MarshallerType.COMPARATOR, comparator);
        marshallers.put(MarshallerType.DEFAULT_VALIDATOR, default_validator);
        marshallers.put(MarshallerType.KEY_VALIDATOR, key_validator);
        marshallers.put(MarshallerType.SUBCOMPARATOR, subcomparator);
        return marshallers;
    }

    /** get the validators */
    protected Map<ByteBuffer, AbstractType> getValidatorMap(CfDef cfDef) throws IOException {
        Map<ByteBuffer, AbstractType> validators = new HashMap<ByteBuffer, AbstractType>();
        for (ColumnDef cd : cfDef.getColumn_metadata()) {
            if (cd.getValidation_class() != null && !cd.getValidation_class().isEmpty()) {
                AbstractType validator = null;
                try {
                    validator = TypeParser.parse(cd.getValidation_class());
                    if (validator instanceof CounterColumnType)
                        validator = LongType.instance;
                    validators.put(cd.name, validator);
                } catch (ConfigurationException e) {
                    throw new IOException(e);
                } catch (SyntaxException e) {
                    throw new IOException(e);
                }
            }
        }
        return validators;
    }
}