org.apache.cassandra.hadoop2.pig.AbstractCassandraStorage.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.cassandra.hadoop2.pig.AbstractCassandraStorage.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.hadoop2.pig;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.net.URLDecoder;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.util.*;

import org.apache.cassandra.exceptions.ConfigurationException;
import org.apache.cassandra.exceptions.SyntaxException;
import org.apache.cassandra.auth.IAuthenticator;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.cql3.CFDefinition;
import org.apache.cassandra.cql3.ColumnIdentifier;
import org.apache.cassandra.db.Column;
import org.apache.cassandra.db.IColumn;
import org.apache.cassandra.db.marshal.*;
import org.apache.cassandra.db.marshal.AbstractCompositeType.CompositeComponent;
import org.apache.cassandra.hadoop2.*;
import org.apache.cassandra.thrift.*;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.Hex;
import org.apache.cassandra.utils.UUIDGen;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.*;
import org.apache.pig.*;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.*;
import org.apache.pig.impl.util.UDFContext;
import org.apache.thrift.TDeserializer;
import org.apache.thrift.TException;
import org.apache.thrift.TSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A LoadStoreFunc for retrieving data from and storing data to Cassandra
 */
public abstract class AbstractCassandraStorage extends LoadFunc implements StoreFuncInterface, LoadMetadata {
    protected enum MarshallerType {
        COMPARATOR, DEFAULT_VALIDATOR, KEY_VALIDATOR, SUBCOMPARATOR
    };

    // system environment variables that can be set to configure connection info:
    // alternatively, Hadoop JobConf variables can be set using keys from ConfigHelper
    public final static String PIG_INPUT_RPC_PORT = "PIG_INPUT_RPC_PORT";
    public final static String PIG_INPUT_INITIAL_ADDRESS = "PIG_INPUT_INITIAL_ADDRESS";
    public final static String PIG_INPUT_PARTITIONER = "PIG_INPUT_PARTITIONER";
    public final static String PIG_OUTPUT_RPC_PORT = "PIG_OUTPUT_RPC_PORT";
    public final static String PIG_OUTPUT_INITIAL_ADDRESS = "PIG_OUTPUT_INITIAL_ADDRESS";
    public final static String PIG_OUTPUT_PARTITIONER = "PIG_OUTPUT_PARTITIONER";
    public final static String PIG_RPC_PORT = "PIG_RPC_PORT";
    public final static String PIG_INITIAL_ADDRESS = "PIG_INITIAL_ADDRESS";
    public final static String PIG_PARTITIONER = "PIG_PARTITIONER";
    public final static String PIG_INPUT_FORMAT = "PIG_INPUT_FORMAT";
    public final static String PIG_OUTPUT_FORMAT = "PIG_OUTPUT_FORMAT";
    public final static String PIG_INPUT_SPLIT_SIZE = "PIG_INPUT_SPLIT_SIZE";

    protected String DEFAULT_INPUT_FORMAT;
    protected String DEFAULT_OUTPUT_FORMAT;

    public final static String PARTITION_FILTER_SIGNATURE = "cassandra.partition.filter";

    protected static final Logger logger = LoggerFactory.getLogger(AbstractCassandraStorage.class);

    protected String username;
    protected String password;
    protected String keyspace;
    protected String column_family;
    protected String loadSignature;
    protected String storeSignature;

    protected Configuration conf;
    protected String inputFormatClass;
    protected String outputFormatClass;
    protected int splitSize = 64 * 1024;
    protected String partitionerClass;
    protected boolean usePartitionFilter = false;

    public AbstractCassandraStorage() {
        super();
    }

    /** Deconstructs a composite type to a Tuple. */
    protected Tuple composeComposite(AbstractCompositeType comparator, ByteBuffer name) throws IOException {
        List<CompositeComponent> result = comparator.deconstruct(name);
        Tuple t = TupleFactory.getInstance().newTuple(result.size());
        for (int i = 0; i < result.size(); i++)
            setTupleValue(t, i, result.get(i).comparator.compose(result.get(i).value));

        return t;
    }

    /** convert a column to a tuple */
    protected Tuple columnToTuple(IColumn col, CfDef cfDef, AbstractType comparator) throws IOException {
        Tuple pair = TupleFactory.getInstance().newTuple(2);

        // name
        if (comparator instanceof AbstractCompositeType)
            setTupleValue(pair, 0, composeComposite((AbstractCompositeType) comparator, col.name()));
        else
            setTupleValue(pair, 0, comparator.compose(col.name()));

        // value
        if (col instanceof Column) {
            // standard
            Map<ByteBuffer, AbstractType> validators = getValidatorMap(cfDef);
            if (validators.get(col.name()) == null) {
                Map<MarshallerType, AbstractType> marshallers = getDefaultMarshallers(cfDef);
                setTupleValue(pair, 1, marshallers.get(MarshallerType.DEFAULT_VALIDATOR).compose(col.value()));
            } else
                setTupleValue(pair, 1, validators.get(col.name()).compose(col.value()));
            return pair;
        } else {
            // super
            ArrayList<Tuple> subcols = new ArrayList<Tuple>();
            for (IColumn subcol : col.getSubColumns())
                subcols.add(columnToTuple(subcol, cfDef, parseType(cfDef.getSubcomparator_type())));

            pair.set(1, new DefaultDataBag(subcols));
        }
        return pair;
    }

    /** set the value to the position of the tuple */
    protected void setTupleValue(Tuple pair, int position, Object value) throws ExecException {
        if (value instanceof BigInteger)
            pair.set(position, ((BigInteger) value).intValue());
        else if (value instanceof ByteBuffer)
            pair.set(position, new DataByteArray(ByteBufferUtil.getArray((ByteBuffer) value)));
        else if (value instanceof UUID)
            pair.set(position, new DataByteArray(UUIDGen.decompose((java.util.UUID) value)));
        else if (value instanceof Date)
            pair.set(position, DateType.instance.decompose((Date) value).getLong());
        else
            pair.set(position, value);
    }

    /** get the columnfamily definition for the signature */
    protected CfDef getCfDef(String signature) {
        UDFContext context = UDFContext.getUDFContext();
        Properties property = context.getUDFProperties(AbstractCassandraStorage.class);
        return cfdefFromString(property.getProperty(signature));
    }

    /** construct a map to store the mashaller type to cassandra data type mapping */
    protected Map<MarshallerType, AbstractType> getDefaultMarshallers(CfDef cfDef) throws IOException {
        Map<MarshallerType, AbstractType> marshallers = new EnumMap<MarshallerType, AbstractType>(
                MarshallerType.class);
        AbstractType comparator;
        AbstractType subcomparator;
        AbstractType default_validator;
        AbstractType key_validator;

        comparator = parseType(cfDef.getComparator_type());
        subcomparator = parseType(cfDef.getSubcomparator_type());
        default_validator = parseType(cfDef.getDefault_validation_class());
        key_validator = parseType(cfDef.getKey_validation_class());

        marshallers.put(MarshallerType.COMPARATOR, comparator);
        marshallers.put(MarshallerType.DEFAULT_VALIDATOR, default_validator);
        marshallers.put(MarshallerType.KEY_VALIDATOR, key_validator);
        marshallers.put(MarshallerType.SUBCOMPARATOR, subcomparator);
        return marshallers;
    }

    /** get the validators */
    protected Map<ByteBuffer, AbstractType> getValidatorMap(CfDef cfDef) throws IOException {
        Map<ByteBuffer, AbstractType> validators = new HashMap<ByteBuffer, AbstractType>();
        for (ColumnDef cd : cfDef.getColumn_metadata()) {
            if (cd.getValidation_class() != null && !cd.getValidation_class().isEmpty()) {
                AbstractType validator = null;
                try {
                    validator = TypeParser.parse(cd.getValidation_class());
                    if (validator instanceof CounterColumnType)
                        validator = LongType.instance;
                    validators.put(cd.name, validator);
                } catch (ConfigurationException e) {
                    throw new IOException(e);
                } catch (SyntaxException e) {
                    throw new IOException(e);
                }
            }
        }
        return validators;
    }

    /** parse the string to a cassandra data type */
    protected AbstractType parseType(String type) throws IOException {
        try {
            // always treat counters like longs, specifically CCT.compose is not what we need
            if (type != null && type.equals("org.apache.cassandra.db.marshal.CounterColumnType"))
                return LongType.instance;
            return TypeParser.parse(type);
        } catch (ConfigurationException e) {
            throw new IOException(e);
        } catch (SyntaxException e) {
            throw new IOException(e);
        }
    }

    @Override
    public InputFormat getInputFormat() {
        try {
            return FBUtilities.construct(inputFormatClass, "inputformat");
        } catch (ConfigurationException e) {
            throw new RuntimeException(e);
        }
    }

    /** decompose the query to store the parameters in a map */
    public static Map<String, String> getQueryMap(String query) throws UnsupportedEncodingException {
        String[] params = query.split("&");
        Map<String, String> map = new HashMap<String, String>();
        for (String param : params) {
            String[] keyValue = param.split("=");
            map.put(keyValue[0], URLDecoder.decode(keyValue[1], "UTF-8"));
        }
        return map;
    }

    /** set hadoop cassandra connection settings */
    protected void setConnectionInformation() throws IOException {
        if (System.getenv(PIG_RPC_PORT) != null) {
            ConfigHelper.setInputRpcPort(conf, System.getenv(PIG_RPC_PORT));
            ConfigHelper.setOutputRpcPort(conf, System.getenv(PIG_RPC_PORT));
        }

        if (System.getenv(PIG_INPUT_RPC_PORT) != null)
            ConfigHelper.setInputRpcPort(conf, System.getenv(PIG_INPUT_RPC_PORT));
        if (System.getenv(PIG_OUTPUT_RPC_PORT) != null)
            ConfigHelper.setOutputRpcPort(conf, System.getenv(PIG_OUTPUT_RPC_PORT));

        if (System.getenv(PIG_INITIAL_ADDRESS) != null) {
            ConfigHelper.setInputInitialAddress(conf, System.getenv(PIG_INITIAL_ADDRESS));
            ConfigHelper.setOutputInitialAddress(conf, System.getenv(PIG_INITIAL_ADDRESS));
        }
        if (System.getenv(PIG_INPUT_INITIAL_ADDRESS) != null)
            ConfigHelper.setInputInitialAddress(conf, System.getenv(PIG_INPUT_INITIAL_ADDRESS));
        if (System.getenv(PIG_OUTPUT_INITIAL_ADDRESS) != null)
            ConfigHelper.setOutputInitialAddress(conf, System.getenv(PIG_OUTPUT_INITIAL_ADDRESS));

        if (System.getenv(PIG_PARTITIONER) != null) {
            ConfigHelper.setInputPartitioner(conf, System.getenv(PIG_PARTITIONER));
            ConfigHelper.setOutputPartitioner(conf, System.getenv(PIG_PARTITIONER));
        }
        if (System.getenv(PIG_INPUT_PARTITIONER) != null)
            ConfigHelper.setInputPartitioner(conf, System.getenv(PIG_INPUT_PARTITIONER));
        if (System.getenv(PIG_OUTPUT_PARTITIONER) != null)
            ConfigHelper.setOutputPartitioner(conf, System.getenv(PIG_OUTPUT_PARTITIONER));
        if (System.getenv(PIG_INPUT_FORMAT) != null)
            inputFormatClass = getFullyQualifiedClassName(System.getenv(PIG_INPUT_FORMAT));
        else
            inputFormatClass = DEFAULT_INPUT_FORMAT;
        if (System.getenv(PIG_OUTPUT_FORMAT) != null)
            outputFormatClass = getFullyQualifiedClassName(System.getenv(PIG_OUTPUT_FORMAT));
        else
            outputFormatClass = DEFAULT_OUTPUT_FORMAT;
    }

    /** get the full class name */
    protected String getFullyQualifiedClassName(String classname) {
        return classname.contains(".") ? classname : "org.apache.cassandra.hadoop2." + classname;
    }

    /** get pig type for the cassandra data type*/
    protected byte getPigType(AbstractType type) {
        if (type instanceof LongType || type instanceof DateType) // DateType is bad and it should feel bad
            return DataType.LONG;
        else if (type instanceof IntegerType || type instanceof Int32Type) // IntegerType will overflow at 2**31, but is kept for compatibility until pig has a BigInteger
            return DataType.INTEGER;
        else if (type instanceof AsciiType)
            return DataType.CHARARRAY;
        else if (type instanceof UTF8Type)
            return DataType.CHARARRAY;
        else if (type instanceof FloatType)
            return DataType.FLOAT;
        else if (type instanceof DoubleType)
            return DataType.DOUBLE;
        else if (type instanceof AbstractCompositeType || type instanceof CollectionType)
            return DataType.TUPLE;

        return DataType.BYTEARRAY;
    }

    public ResourceStatistics getStatistics(String location, Job job) {
        return null;
    }

    @Override
    public String relativeToAbsolutePath(String location, Path curDir) throws IOException {
        return location;
    }

    @Override
    public void setUDFContextSignature(String signature) {
        this.loadSignature = signature;
    }

    /** StoreFunc methods */
    public void setStoreFuncUDFContextSignature(String signature) {
        this.storeSignature = signature;
    }

    public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException {
        return relativeToAbsolutePath(location, curDir);
    }

    /** output format */
    public OutputFormat getOutputFormat() {
        try {
            return FBUtilities.construct(outputFormatClass, "outputformat");
        } catch (ConfigurationException e) {
            throw new RuntimeException(e);
        }
    }

    public void checkSchema(ResourceSchema schema) throws IOException {
        // we don't care about types, they all get casted to ByteBuffers
    }

    /** convert object to ByteBuffer */
    protected ByteBuffer objToBB(Object o) {
        if (o == null)
            return (ByteBuffer) o;
        if (o instanceof java.lang.String)
            return ByteBuffer.wrap(new DataByteArray((String) o).get());
        if (o instanceof Integer)
            return Int32Type.instance.decompose((Integer) o);
        if (o instanceof Long)
            return LongType.instance.decompose((Long) o);
        if (o instanceof Float)
            return FloatType.instance.decompose((Float) o);
        if (o instanceof Double)
            return DoubleType.instance.decompose((Double) o);
        if (o instanceof UUID)
            return ByteBuffer.wrap(UUIDGen.decompose((UUID) o));
        if (o instanceof Tuple) {
            List<Object> objects = ((Tuple) o).getAll();
            //collections
            if (objects.size() > 0 && objects.get(0) instanceof String) {
                String collectionType = (String) objects.get(0);
                if ("set".equalsIgnoreCase(collectionType) || "list".equalsIgnoreCase(collectionType))
                    return objToListOrSetBB(objects.subList(1, objects.size()));
                else if ("map".equalsIgnoreCase(collectionType))
                    return objToMapBB(objects.subList(1, objects.size()));

            }
            return objToCompositeBB(objects);
        }

        return ByteBuffer.wrap(((DataByteArray) o).get());
    }

    private ByteBuffer objToListOrSetBB(List<Object> objects) {
        List<ByteBuffer> serialized = new ArrayList<ByteBuffer>(objects.size());
        for (Object sub : objects) {
            ByteBuffer buffer = objToBB(sub);
            serialized.add(buffer);
        }
        return CollectionType.pack(serialized, objects.size());
    }

    private ByteBuffer objToMapBB(List<Object> objects) {
        List<ByteBuffer> serialized = new ArrayList<ByteBuffer>(objects.size() * 2);
        for (Object sub : objects) {
            List<Object> keyValue = ((Tuple) sub).getAll();
            for (Object entry : keyValue) {
                ByteBuffer buffer = objToBB(entry);
                serialized.add(buffer);
            }
        }
        return CollectionType.pack(serialized, objects.size());
    }

    private ByteBuffer objToCompositeBB(List<Object> objects) {
        List<ByteBuffer> serialized = new ArrayList<ByteBuffer>(objects.size());
        int totalLength = 0;
        for (Object sub : objects) {
            ByteBuffer buffer = objToBB(sub);
            serialized.add(buffer);
            totalLength += 2 + buffer.remaining() + 1;
        }
        ByteBuffer out = ByteBuffer.allocate(totalLength);
        for (ByteBuffer bb : serialized) {
            int length = bb.remaining();
            out.put((byte) ((length >> 8) & 0xFF));
            out.put((byte) (length & 0xFF));
            out.put(bb);
            out.put((byte) 0);
        }
        out.flip();
        return out;
    }

    public void cleanupOnFailure(String failure, Job job) {
    }

    /** Methods to get the column family schema from Cassandra */
    protected void initSchema(String signature) {
        Properties properties = UDFContext.getUDFContext().getUDFProperties(AbstractCassandraStorage.class);

        // Only get the schema if we haven't already gotten it
        if (!properties.containsKey(signature)) {
            try {
                Cassandra.Client client = ConfigHelper.getClientFromInputAddressList(conf);
                client.set_keyspace(keyspace);

                if (username != null && password != null) {
                    Map<String, String> credentials = new HashMap<String, String>(2);
                    credentials.put(IAuthenticator.USERNAME_KEY, username);
                    credentials.put(IAuthenticator.PASSWORD_KEY, password);

                    try {
                        client.login(new AuthenticationRequest(credentials));
                    } catch (AuthenticationException e) {
                        logger.error("Authentication exception: invalid username and/or password");
                        throw new RuntimeException(e);
                    } catch (AuthorizationException e) {
                        throw new AssertionError(e); // never actually throws AuthorizationException.
                    }
                }

                // compose the CfDef for the columfamily
                CfDef cfDef = getCfDef(client);

                if (cfDef != null)
                    properties.setProperty(signature, cfdefToString(cfDef));
                else
                    throw new RuntimeException(String.format("Column family '%s' not found in keyspace '%s'",
                            column_family, keyspace));
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }

    /** convert CfDef to string */
    protected static String cfdefToString(CfDef cfDef) {
        assert cfDef != null;
        // this is so awful it's kind of cool!
        TSerializer serializer = new TSerializer(new TBinaryProtocol.Factory());
        try {
            return Hex.bytesToHex(serializer.serialize(cfDef));
        } catch (TException e) {
            throw new RuntimeException(e);
        }
    }

    /** convert string back to CfDef */
    protected static CfDef cfdefFromString(String st) {
        assert st != null;
        TDeserializer deserializer = new TDeserializer(new TBinaryProtocol.Factory());
        CfDef cfDef = new CfDef();
        try {
            deserializer.deserialize(cfDef, Hex.hexToBytes(st));
        } catch (TException e) {
            throw new RuntimeException(e);
        }
        return cfDef;
    }

    /** return the CfDef for the column family */
    protected CfDef getCfDef(Cassandra.Client client) throws InvalidRequestException, UnavailableException,
            TimedOutException, SchemaDisagreementException, TException, CharacterCodingException, NotFoundException,
            org.apache.cassandra.exceptions.InvalidRequestException, ConfigurationException {
        // get CF meta data
        String query = "SELECT type," + "       comparator," + "       subcomparator," + "       default_validator,"
                + "       key_validator," + "       key_aliases," + "       key_alias "
                + "FROM system.schema_columnfamilies " + "WHERE keyspace_name = '%s' "
                + "  AND columnfamily_name = '%s' ";

        CqlResult result = client.execute_cql3_query(
                ByteBufferUtil.bytes(String.format(query, keyspace, column_family)), Compression.NONE,
                ConsistencyLevel.ONE);

        if (result == null || result.rows == null || result.rows.isEmpty())
            return null;

        Iterator<CqlRow> iteraRow = result.rows.iterator();
        CfDef cfDef = new CfDef();
        cfDef.keyspace = keyspace;
        cfDef.name = column_family;
        boolean cql3Table = false;
        if (iteraRow.hasNext()) {
            CqlRow cqlRow = iteraRow.next();

            cfDef.column_type = ByteBufferUtil.string(cqlRow.columns.get(0).value);
            cfDef.comparator_type = ByteBufferUtil.string(cqlRow.columns.get(1).value);
            ByteBuffer subComparator = cqlRow.columns.get(2).value;
            if (subComparator != null)
                cfDef.subcomparator_type = ByteBufferUtil.string(subComparator);
            cfDef.default_validation_class = ByteBufferUtil.string(cqlRow.columns.get(3).value);
            cfDef.key_validation_class = ByteBufferUtil.string(cqlRow.columns.get(4).value);
            List<String> keys = null;
            if (cqlRow.columns.get(5).value != null) {
                String keyAliases = ByteBufferUtil.string(cqlRow.columns.get(5).value);
                keys = FBUtilities.fromJsonList(keyAliases);
                // classis thrift tables
                if (keys.size() == 0 && cqlRow.columns.get(6).value == null) {
                    CFDefinition cfDefinition = getCfDefinition(keyspace, column_family, client);
                    for (ColumnIdentifier column : cfDefinition.keys.keySet()) {
                        String key = column.toString();
                        String type = cfDefinition.keys.get(column).type.toString();
                        logger.debug("name: {}, type: {} ", key, type);
                        keys.add(key);
                    }
                } else
                    cql3Table = true;
            } else {
                String keyAlias = ByteBufferUtil.string(cqlRow.columns.get(6).value);
                keys = new ArrayList<String>(1);
                keys.add(keyAlias);
            }
        }
        cfDef.column_metadata = getColumnMetadata(client, cql3Table);
        return cfDef;
    }

    /** get a list of columns */
    protected abstract List<ColumnDef> getColumnMetadata(Cassandra.Client client, boolean cql3Table)
            throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException,
            TException, CharacterCodingException, org.apache.cassandra.exceptions.InvalidRequestException,
            ConfigurationException, NotFoundException;

    /** get column meta data */
    protected List<ColumnDef> getColumnMeta(Cassandra.Client client, boolean cassandraStorage)
            throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException,
            TException, CharacterCodingException, org.apache.cassandra.exceptions.InvalidRequestException,
            ConfigurationException, NotFoundException {
        String query = "SELECT column_name, " + "       validator, " + "       index_type "
                + "FROM system.schema_columns " + "WHERE keyspace_name = '%s' " + "  AND columnfamily_name = '%s'";

        CqlResult result = client.execute_cql3_query(
                ByteBufferUtil.bytes(String.format(query, keyspace, column_family)), Compression.NONE,
                ConsistencyLevel.ONE);

        List<CqlRow> rows = result.rows;
        List<ColumnDef> columnDefs = new ArrayList<ColumnDef>();
        if (!cassandraStorage && (rows == null || rows.isEmpty())) {
            // check classic thrift tables
            CFDefinition cfDefinition = getCfDefinition(keyspace, column_family, client);
            for (ColumnIdentifier column : cfDefinition.metadata.keySet()) {
                ColumnDef cDef = new ColumnDef();
                String columnName = column.toString();
                String type = cfDefinition.metadata.get(column).type.toString();
                logger.debug("name: {}, type: {} ", columnName, type);
                cDef.name = ByteBufferUtil.bytes(columnName);
                cDef.validation_class = type;
                columnDefs.add(cDef);
            }
            if (columnDefs.size() == 0) {
                String value = cfDefinition.value != null ? cfDefinition.value.toString() : null;
                if ("value".equals(value)) {
                    ColumnDef cDef = new ColumnDef();
                    cDef.name = ByteBufferUtil.bytes(value);
                    cDef.validation_class = cfDefinition.value.type.toString();
                    columnDefs.add(cDef);
                }
            }
            return columnDefs;
        } else if (rows == null || rows.isEmpty())
            return columnDefs;

        Iterator<CqlRow> iterator = rows.iterator();
        while (iterator.hasNext()) {
            CqlRow row = iterator.next();
            ColumnDef cDef = new ColumnDef();
            cDef.setName(ByteBufferUtil.clone(row.getColumns().get(0).value));
            cDef.validation_class = ByteBufferUtil.string(row.getColumns().get(1).value);
            ByteBuffer indexType = row.getColumns().get(2).value;
            if (indexType != null)
                cDef.index_type = getIndexType(ByteBufferUtil.string(indexType));
            columnDefs.add(cDef);
        }
        return columnDefs;
    }

    /** get keys meta data */
    protected List<ColumnDef> getKeysMeta(Cassandra.Client client) throws Exception {
        String query = "SELECT key_aliases, " + "       column_aliases, " + "       key_validator, "
                + "       comparator, " + "       keyspace_name, " + "       value_alias, "
                + "       default_validator," + "       key_alias  " + "FROM system.schema_columnfamilies "
                + "WHERE keyspace_name = '%s'" + "  AND columnfamily_name = '%s' ";

        CqlResult result = client.execute_cql3_query(
                ByteBufferUtil.bytes(String.format(query, keyspace, column_family)), Compression.NONE,
                ConsistencyLevel.ONE);

        if (result == null || result.rows == null || result.rows.isEmpty())
            return null;

        List<CqlRow> rows = result.rows;
        Iterator<CqlRow> iteraRow = rows.iterator();
        List<ColumnDef> keys = new ArrayList<ColumnDef>();
        if (iteraRow.hasNext()) {
            CqlRow cqlRow = iteraRow.next();
            String name = ByteBufferUtil.string(cqlRow.columns.get(4).value);
            logger.debug("Found ksDef name: {}", name);
            String keyString;
            List<String> keyNames;
            Iterator<String> iterator;
            if (cqlRow.columns.get(0).getValue() == null) {
                ColumnDef cDef = new ColumnDef();
                cDef.name = ByteBuffer.wrap(result.rows.get(0).columns.get(7).getValue());
                keys.add(cDef);
            } else {
                keyString = ByteBufferUtil.string(ByteBuffer.wrap(cqlRow.columns.get(0).getValue()));

                logger.debug("partition keys: {}", keyString);
                keyNames = FBUtilities.fromJsonList(keyString);

                iterator = keyNames.iterator();
                while (iterator.hasNext()) {
                    ColumnDef cDef = new ColumnDef();
                    cDef.name = ByteBufferUtil.bytes(iterator.next());
                    keys.add(cDef);
                }
                // classic thrift tables
                if (keys.size() == 0) {
                    CFDefinition cfDefinition = getCfDefinition(keyspace, column_family, client);
                    for (ColumnIdentifier column : cfDefinition.keys.keySet()) {
                        String key = column.toString();
                        logger.debug("name: {} ", key);
                        ColumnDef cDef = new ColumnDef();
                        cDef.name = ByteBufferUtil.bytes(key);
                        keys.add(cDef);
                    }
                    for (ColumnIdentifier column : cfDefinition.columns.keySet()) {
                        String key = column.toString();
                        logger.debug("name: {} ", key);
                        ColumnDef cDef = new ColumnDef();
                        cDef.name = ByteBufferUtil.bytes(key);
                        keys.add(cDef);
                    }
                }

            }
            keyString = ByteBufferUtil.string(ByteBuffer.wrap(cqlRow.columns.get(1).getValue()));

            logger.debug("cluster keys: {}", keyString);
            keyNames = FBUtilities.fromJsonList(keyString);

            iterator = keyNames.iterator();
            while (iterator.hasNext()) {
                ColumnDef cDef = new ColumnDef();
                cDef.name = ByteBufferUtil.bytes(iterator.next());
                keys.add(cDef);
            }

            String validator = ByteBufferUtil.string(ByteBuffer.wrap(cqlRow.columns.get(2).getValue()));
            logger.debug("row key validator: {}", validator);
            AbstractType<?> keyValidator = parseType(validator);

            Iterator<ColumnDef> keyItera = keys.iterator();
            if (keyValidator instanceof CompositeType) {
                Iterator<AbstractType<?>> typeItera = ((CompositeType) keyValidator).types.iterator();
                while (typeItera.hasNext())
                    keyItera.next().validation_class = typeItera.next().toString();
            } else
                keyItera.next().validation_class = keyValidator.toString();

            validator = ByteBufferUtil.string(ByteBuffer.wrap(cqlRow.columns.get(3).getValue()));
            logger.debug("cluster key validator: {}", validator);

            if (keyItera.hasNext() && validator != null && !validator.isEmpty()) {
                AbstractType<?> clusterKeyValidator = parseType(validator);

                if (clusterKeyValidator instanceof CompositeType) {
                    Iterator<AbstractType<?>> typeItera = ((CompositeType) clusterKeyValidator).types.iterator();
                    while (keyItera.hasNext())
                        keyItera.next().validation_class = typeItera.next().toString();
                } else
                    keyItera.next().validation_class = clusterKeyValidator.toString();
            }

            // compact value_alias column
            if (cqlRow.columns.get(5).value != null) {
                try {
                    String compactValidator = ByteBufferUtil
                            .string(ByteBuffer.wrap(cqlRow.columns.get(6).getValue()));
                    logger.debug("default validator: {}", compactValidator);
                    AbstractType<?> defaultValidator = parseType(compactValidator);

                    ColumnDef cDef = new ColumnDef();
                    cDef.name = cqlRow.columns.get(5).value;
                    cDef.validation_class = defaultValidator.toString();
                    keys.add(cDef);
                } catch (Exception e) {
                    // no compact column at value_alias
                }
            }

        }
        return keys;
    }

    /** get index type from string */
    protected IndexType getIndexType(String type) {
        type = type.toLowerCase();
        if ("keys".equals(type))
            return IndexType.KEYS;
        else if ("custom".equals(type))
            return IndexType.CUSTOM;
        else if ("composites".equals(type))
            return IndexType.COMPOSITES;
        else
            return null;
    }

    /** return partition keys */
    public String[] getPartitionKeys(String location, Job job) {
        if (!usePartitionFilter)
            return null;
        List<ColumnDef> indexes = getIndexes();
        String[] partitionKeys = new String[indexes.size()];
        for (int i = 0; i < indexes.size(); i++) {
            partitionKeys[i] = new String(indexes.get(i).getName());
        }
        return partitionKeys;
    }

    /** get a list of columns with defined index*/
    protected List<ColumnDef> getIndexes() {
        CfDef cfdef = getCfDef(loadSignature);
        List<ColumnDef> indexes = new ArrayList<ColumnDef>();
        for (ColumnDef cdef : cfdef.column_metadata) {
            if (cdef.index_type != null)
                indexes.add(cdef);
        }
        return indexes;
    }

    /** get CFDefinition of a column family */
    private CFDefinition getCfDefinition(String ks, String cf, Cassandra.Client client)
            throws NotFoundException, InvalidRequestException, TException,
            org.apache.cassandra.exceptions.InvalidRequestException, ConfigurationException {
        KsDef ksDef = client.describe_keyspace(ks);
        for (CfDef cfDef : ksDef.cf_defs) {
            if (cfDef.name.equalsIgnoreCase(cf))
                return new CFDefinition(CFMetaData.fromThrift(cfDef));
        }
        return null;
    }
}