Example usage for org.apache.cassandra.hadoop ConfigHelper setInputColumnFamily

Introduction

In this page you can find the example usage for org.apache.cassandra.hadoop ConfigHelper setInputColumnFamily.

Prototype

public static void setInputColumnFamily(Configuration conf, String keyspace, String columnFamily,
        boolean widerows)

Source Link

Document

Set the keyspace and column family for the input of this job.

Usage

From source file:grakn.core.server.session.reader.GraknBinaryInputFormat.java

License:Open Source License

@Override
public void setConf(final Configuration config) {
    super.setConf(config);

    // Copy some JanusGraph configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat
    ConfigHelper.setInputInitialAddress(config,
            janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]);
    if (janusgraphConf.has(GraphDatabaseConfiguration.STORAGE_PORT)) {
        ConfigHelper.setInputRpcPort(config,
                String.valueOf(janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_PORT)));
    }/*from w ww  .j  ava2  s .c  o m*/
    if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_USERNAME)) {
        ConfigHelper.setInputKeyspaceUserName(config,
                janusgraphConf.get(GraphDatabaseConfiguration.AUTH_USERNAME));
    }
    if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD)) {
        ConfigHelper.setInputKeyspacePassword(config,
                janusgraphConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD));
    }
    // Copy keyspace, force the CF setting to edgestore, honor widerows when set
    final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false);
    // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false
    ConfigHelper.setInputColumnFamily(config,
            janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE),
            mrConf.get(JanusGraphHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows);
    log.debug("Set keyspace: {}", janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE));

    // Set the column slice bounds via Faunus' vertex query filter
    final SlicePredicate predicate = new SlicePredicate();
    final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE);
    predicate.setSlice_range(getSliceRange(rangeBatchSize)); // TODO stop slicing the whole row
    ConfigHelper.setInputSlicePredicate(config, predicate);
}

From source file:org.apache.hadoop.hive.cassandra.input.cql.HiveCqlInputFormat.java

License:Apache License

@Override
public RecordReader<MapWritableComparable, MapWritable> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {
    HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split;

    List<String> columns = CqlSerDe.parseColumnMapping(cassandraSplit.getColumnMapping());

    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (columns.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }/*from   w w  w  .  ja  v  a  2 s  .  c om*/

    ColumnFamilySplit cfSplit = cassandraSplit.getSplit();
    Job job = new Job(jobConf);

    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {
        @Override
        public void progress() {
            reporter.progress();
        }
    };

    SlicePredicate predicate = new SlicePredicate();

    predicate.setColumn_names(getColumnNames(columns, readColIDs));

    try {

        boolean wideRows = true;

        ConfigHelper.setInputColumnFamily(tac.getConfiguration(), cassandraSplit.getKeyspace(),
                cassandraSplit.getColumnFamily(), wideRows);

        ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate);
        ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize());
        ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + "");
        ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost());
        ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner());
        // Set Split Size
        ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize());

        LOG.info("Validators : " + tac.getConfiguration().get(CassandraColumnSerDe.CASSANDRA_VALIDATOR_TYPE));
        List<IndexExpression> indexExpr = parseFilterPredicate(jobConf);
        if (indexExpr != null) {
            //We have pushed down a filter from the Hive query, we can use this against secondary indexes
            ConfigHelper.setInputRange(tac.getConfiguration(), indexExpr);
        }

        CqlHiveRecordReader rr = new CqlHiveRecordReader(new CqlPagingRecordReader());

        rr.initialize(cfSplit, tac);

        return rr;

    } catch (Exception ie) {
        throw new IOException(ie);
    }
}

From source file:org.apache.hadoop.hive.cassandra.input.HiveCassandraStandardColumnInputFormat.java

License:Apache License

@Override
public RecordReader<BytesWritable, MapWritable> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {
    HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split;

    List<String> columns = CassandraColumnSerDe.parseColumnMapping(cassandraSplit.getColumnMapping());
    isTransposed = CassandraColumnSerDe.isTransposed(columns);

    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (columns.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }// w  ww  .jav  a 2  s.c  o  m

    org.apache.cassandra.hadoop.ColumnFamilySplit cfSplit = cassandraSplit.getSplit();
    Job job = new Job(jobConf);

    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {
        @Override
        public void progress() {
            reporter.progress();
        }
    };

    SlicePredicate predicate = new SlicePredicate();

    if (isTransposed || readColIDs.size() == columns.size() || readColIDs.size() == 0) {
        SliceRange range = new SliceRange();
        AbstractType comparator = BytesType.instance;

        String comparatorType = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR);
        if (comparatorType != null && !comparatorType.equals("")) {
            try {
                comparator = TypeParser.parse(comparatorType);
            } catch (ConfigurationException ex) {
                throw new IOException("Comparator class not found.");
            } catch (SyntaxException e) {
                throw new IOException(e);
            }
        }

        String sliceStart = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START);
        String sliceEnd = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH);
        String reversed = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED);

        range.setStart(comparator.fromString(sliceStart == null ? "" : sliceStart));
        range.setFinish(comparator.fromString(sliceEnd == null ? "" : sliceEnd));
        range.setReversed(reversed == null ? false : reversed.equals("true"));
        range.setCount(cassandraSplit.getSlicePredicateSize());
        predicate.setSlice_range(range);
    } else {
        int iKey = columns.indexOf(CassandraColumnSerDe.CASSANDRA_KEY_COLUMN);
        predicate.setColumn_names(getColumnNames(iKey, columns, readColIDs));
    }

    try {

        boolean wideRows = false;
        if (isTransposed && tac.getConfiguration()
                .getBoolean(CassandraColumnSerDe.CASSANDRA_ENABLE_WIDEROW_ITERATOR, true)) {
            wideRows = true;
        }

        ConfigHelper.setInputColumnFamily(tac.getConfiguration(), cassandraSplit.getKeyspace(),
                cassandraSplit.getColumnFamily(), wideRows);

        ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate);
        ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize());
        ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + "");
        ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost());
        ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner());
        // Set Split Size
        ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize());

        LOG.info("Validators : " + tac.getConfiguration().get(CassandraColumnSerDe.CASSANDRA_VALIDATOR_TYPE));
        List<IndexExpression> indexExpr = parseFilterPredicate(jobConf);
        if (indexExpr != null) {
            //We have pushed down a filter from the Hive query, we can use this against secondary indexes
            ConfigHelper.setInputRange(tac.getConfiguration(), indexExpr);
        }

        CassandraHiveRecordReader rr = new CassandraHiveRecordReader(new ColumnFamilyRecordReader(),
                isTransposed);

        rr.initialize(cfSplit, tac);

        return rr;

    } catch (Exception ie) {
        throw new IOException(ie);
    }
}

From source file:org.janusgraph.hadoop.formats.cassandra.CassandraBinaryInputFormat.java

License:Apache License

@Override
public void setConf(final Configuration config) {
    super.setConf(config);

    // Copy some JanusGraph configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat
    ConfigHelper.setInputInitialAddress(config,
            janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]);
    if (janusgraphConf.has(GraphDatabaseConfiguration.STORAGE_PORT))
        ConfigHelper.setInputRpcPort(config,
                String.valueOf(janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_PORT)));
    if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_USERNAME))
        ConfigHelper.setInputKeyspaceUserName(config,
                janusgraphConf.get(GraphDatabaseConfiguration.AUTH_USERNAME));
    if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD))
        ConfigHelper.setInputKeyspacePassword(config,
                janusgraphConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD));

    // Copy keyspace, force the CF setting to edgestore, honor widerows when set
    final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false);
    // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false
    ConfigHelper.setInputColumnFamily(config,
            janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE),
            mrConf.get(JanusGraphHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows);
    log.debug("Set keyspace: {}", janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE));

    // Set the column slice bounds via Faunus's vertex query filter
    final SlicePredicate predicate = new SlicePredicate();
    final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE);
    predicate.setSlice_range(getSliceRange(JanusGraphHadoopSetupCommon.DEFAULT_SLICE_QUERY, rangeBatchSize)); // TODO stop slicing the whole row
    ConfigHelper.setInputSlicePredicate(config, predicate);
}

From source file:org.janusgraph.hadoop.formats.cql.CqlBinaryInputFormat.java

License:Apache License

@Override
public void setConf(final Configuration config) {
    super.setConf(config);

    // Copy some JanusGraph configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat
    ConfigHelper.setInputInitialAddress(config,
            janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]);
    if (janusgraphConf.has(GraphDatabaseConfiguration.STORAGE_PORT))
        ConfigHelper.setInputRpcPort(config,
                String.valueOf(janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_PORT)));
    if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_USERNAME))
        ConfigHelper.setInputKeyspaceUserName(config,
                janusgraphConf.get(GraphDatabaseConfiguration.AUTH_USERNAME));
    if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD))
        ConfigHelper.setInputKeyspacePassword(config,
                janusgraphConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD));

    // Copy keyspace, force the CF setting to edgestore, honor widerows when set
    final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false);
    // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false
    ConfigHelper.setInputColumnFamily(config, janusgraphConf.get(CQLConfigOptions.KEYSPACE),
            mrConf.get(JanusGraphHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows);
    log.debug("Set keyspace: {}", janusgraphConf.get(CQLConfigOptions.KEYSPACE));

    // Set the column slice bounds via Faunus' vertex query filter
    final SlicePredicate predicate = new SlicePredicate();
    final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE);
    predicate.setSlice_range(getSliceRange(rangeBatchSize)); // TODO stop slicing the whole row
    ConfigHelper.setInputSlicePredicate(config, predicate);
}