Example usage for org.apache.cassandra.hadoop ConfigHelper getInputPartitioner

List of usage examples for org.apache.cassandra.hadoop ConfigHelper getInputPartitioner

Introduction

In this page you can find the example usage for org.apache.cassandra.hadoop ConfigHelper getInputPartitioner.

Prototype

public static IPartitioner getInputPartitioner(Configuration conf) 

Source Link

Usage

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

private static void validateConfiguration(Configuration conf) {
    if (ConfigHelper.getInputKeyspace(conf) == null || ConfigHelper.getInputColumnFamily(conf) == null) {
        throw new UnsupportedOperationException(
                "you must set the keyspace and columnfamily with setColumnFamily()");
    }/*from   w  w  w.  ja va2s .  co  m*/
    if (ConfigHelper.getInputSlicePredicate(conf) == null) {
        throw new UnsupportedOperationException("you must set the predicate with setPredicate");
    }
    if (ConfigHelper.getInputInitialAddress(conf) == null)
        throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node");
    if (ConfigHelper.getInputPartitioner(conf) == null)
        throw new UnsupportedOperationException("You must set the Cassandra partitioner class");
}

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();

    validateConfiguration(conf);/*from w w  w.ja v a  2  s .co m*/

    // cannonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);

    keyspace = ConfigHelper.getInputKeyspace(context.getConfiguration());
    cfName = ConfigHelper.getInputColumnFamily(context.getConfiguration());
    partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration());
    logger.debug("partitioner is " + partitioner);

    // cannonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = Executors.newCachedThreadPool();
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        List<Future<List<InputSplit>>> splitfutures = new ArrayList<Future<List<InputSplit>>>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null && jobKeyRange.start_token != null) {
            assert partitioner
                    .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner";
            assert jobKeyRange.start_key == null : "only start_token supported";
            assert jobKeyRange.end_key == null : "only end_token supported";
            jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                    partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner);
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                // for each range, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, conf)));
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(range, conf)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<InputSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

From source file:com.dse.pig.udfs.CqlStorage.java

License:Apache License

/** set read configuration settings */
public void setLocation(String location, Job job) throws IOException {
    conf = job.getConfiguration();/*from   w w  w .  ja  va  2  s  . co  m*/
    setLocationFromUri(location);

    if (username != null && password != null)
        ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password);
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass != null)
        ConfigHelper.setInputPartitioner(conf, partitionerClass);
    if (rpcPort != null)
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    if (initHostAddress != null)
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);

    ConfigHelper.setInputColumnFamily(conf, keyspace, column_family);
    setConnectionInformation();

    CqlConfigHelper.setInputCQLPageRowSize(conf, String.valueOf(pageSize));
    if (columns != null && !columns.trim().isEmpty())
        CqlConfigHelper.setInputColumns(conf, columns);

    String whereClauseForPartitionFilter = getWhereClauseForPartitionFilter();
    String wc = whereClause != null && !whereClause.trim().isEmpty()
            ? whereClauseForPartitionFilter == null ? whereClause
                    : String.format("%s AND %s", whereClause.trim(), whereClauseForPartitionFilter)
            : whereClauseForPartitionFilter;

    if (wc != null) {
        logger.debug("where clause: {}", wc);
        CqlConfigHelper.setInputWhereClauses(conf, wc);
    }

    if (System.getenv(PIG_INPUT_SPLIT_SIZE) != null) {
        try {
            ConfigHelper.setInputSplitSize(conf, Integer.valueOf(System.getenv(PIG_INPUT_SPLIT_SIZE)));
        } catch (NumberFormatException e) {
            throw new IOException("PIG_INPUT_SPLIT_SIZE is not a number", e);
        }
    }

    if (ConfigHelper.getInputRpcPort(conf) == 0)
        throw new IOException("PIG_INPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
    if (ConfigHelper.getInputInitialAddress(conf) == null)
        throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getInputPartitioner(conf) == null)
        throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");
    if (loadSignature == null)
        loadSignature = location;

    initSchema(loadSignature);
}

From source file:grakn.core.server.session.reader.GraknCqlBridgeRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength()
            : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try {//w w w.  ja  v a 2  s . c  o m
        if (cluster != null) {
            return;
        }
        // create a Cluster instance
        String[] locations = split.getLocations();
        // Previous implementation of this class was instantiating a new Clutser with the following comment:
        // "disregard the conf as it brings some unforeseen issues."
        // Cluster.builder().addContactPoints(locations).build();

        // The above ignores the config so it's not possible to use it when we need to change default ports
        // as they won't be correctly propagated. So now we create Cluster using conf.
        // If this keeps breaking we might need to investigate further.
        cluster = CqlConfigHelper.getInputCluster(ConfigHelper.getInputInitialAddress(conf).split(","), conf);
    } catch (Exception e) {
        throw new RuntimeException(
                "Unable to create cluster for table: " + cfName + ", in keyspace: " + keyspace, e);
    }
    // cluster should be represent to a valid cluster now
    session = cluster.connect(quote(keyspace));
    Preconditions.checkState(session != null, "Can't create connection session");
    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion().toInt();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery)
            && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    log.trace("cqlQuery {}", cqlQuery);
    distinctKeyIterator = new DistinctKeyIterator();
    log.trace("created {}", distinctKeyIterator);
}

From source file:org.janusgraph.hadoop.formats.cassandra.CqlBridgeRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength()
            : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try {/*from ww  w . j  av  a2s  .c o m*/
        if (cluster != null) {
            return;
        }
        // create a Cluster instance
        String[] locations = split.getLocations();
        //            cluster = CqlConfigHelper.getInputCluster(locations, conf);
        // disregard the conf as it brings some unforeseen issues.
        cluster = Cluster.builder().addContactPoints(locations).build();
    } catch (Exception e) {
        throw new RuntimeException(
                "Unable to create cluster for table: " + cfName + ", in keyspace: " + keyspace, e);
    }
    // cluster should be represent to a valid cluster now
    session = cluster.connect(quote(keyspace));
    Preconditions.checkState(session != null, "Can't create connection session");
    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion().toInt();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery)
            && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    log.trace("cqlQuery {}", cqlQuery);
    distinctKeyIterator = new DistinctKeyIterator();
    log.trace("created {}", distinctKeyIterator);
}