Example usage for org.apache.cassandra.hadoop ConfigHelper getInputColumnFamily

Introduction

In this page you can find the example usage for org.apache.cassandra.hadoop ConfigHelper getInputColumnFamily.

Prototype

public static String getInputColumnFamily(Configuration conf)

Source Link

Usage

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

private static void validateConfiguration(Configuration conf) {
    if (ConfigHelper.getInputKeyspace(conf) == null || ConfigHelper.getInputColumnFamily(conf) == null) {
        throw new UnsupportedOperationException(
                "you must set the keyspace and columnfamily with setColumnFamily()");
    }// w  w w .  java2 s . c  o m
    if (ConfigHelper.getInputSlicePredicate(conf) == null) {
        throw new UnsupportedOperationException("you must set the predicate with setPredicate");
    }
    if (ConfigHelper.getInputInitialAddress(conf) == null)
        throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node");
    if (ConfigHelper.getInputPartitioner(conf) == null)
        throw new UnsupportedOperationException("You must set the Cassandra partitioner class");
}

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();

    validateConfiguration(conf);//  w  w w . j a  va 2  s.  co  m

    // cannonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);

    keyspace = ConfigHelper.getInputKeyspace(context.getConfiguration());
    cfName = ConfigHelper.getInputColumnFamily(context.getConfiguration());
    partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration());
    logger.debug("partitioner is " + partitioner);

    // cannonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = Executors.newCachedThreadPool();
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        List<Future<List<InputSplit>>> splitfutures = new ArrayList<Future<List<InputSplit>>>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null && jobKeyRange.start_token != null) {
            assert partitioner
                    .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner";
            assert jobKeyRange.start_key == null : "only start_token supported";
            assert jobKeyRange.end_key == null : "only end_token supported";
            jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                    partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner);
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                // for each range, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, conf)));
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(range, conf)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<InputSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyRecordReader.java

License:Apache License

public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = context.getConfiguration();
    KeyRange jobRange = ConfigHelper.getInputKeyRange(conf);
    filter = jobRange == null ? null : jobRange.row_filter;
    predicate = ConfigHelper.getInputSlicePredicate(conf);
    boolean widerows = ConfigHelper.getInputIsWide(conf);
    isEmptyPredicate = isEmptyPredicate(predicate);
    totalRowCount = ConfigHelper.getInputSplitSize(conf);
    batchSize = ConfigHelper.getRangeBatchSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getReadConsistencyLevel(conf));

    keyspace = ConfigHelper.getInputKeyspace(conf);

    try {//from  w w w. j  a  va 2  s.  co  m
        // only need to connect once
        if (socket != null && socket.isOpen())
            return;

        // create connection using thrift
        String location = getLocation();
        socket = new TSocket(location, ConfigHelper.getInputRpcPort(conf));
        TTransport transport = ConfigHelper.getInputTransportFactory(conf).openTransport(socket);
        TBinaryProtocol binaryProtocol = new TBinaryProtocol(transport);
        client = new Cassandra.Client(binaryProtocol);

        // log in
        client.set_keyspace(keyspace);
        if (ConfigHelper.getInputKeyspaceUserName(conf) != null) {
            Map<String, String> creds = new HashMap<String, String>();
            creds.put(IAuthenticator.USERNAME_KEY, ConfigHelper.getInputKeyspaceUserName(conf));
            creds.put(IAuthenticator.PASSWORD_KEY, ConfigHelper.getInputKeyspacePassword(conf));
            AuthenticationRequest authRequest = new AuthenticationRequest(creds);
            client.login(authRequest);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    iter = widerows ? new WideRowIterator() : new StaticRowIterator();
    logger.debug("created {}", iter);
}

From source file:grakn.core.server.session.reader.GraknCqlBridgeRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength()
            : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try {// w w w . j  ava 2  s  .  c o  m
        if (cluster != null) {
            return;
        }
        // create a Cluster instance
        String[] locations = split.getLocations();
        // Previous implementation of this class was instantiating a new Clutser with the following comment:
        // "disregard the conf as it brings some unforeseen issues."
        // Cluster.builder().addContactPoints(locations).build();

        // The above ignores the config so it's not possible to use it when we need to change default ports
        // as they won't be correctly propagated. So now we create Cluster using conf.
        // If this keeps breaking we might need to investigate further.
        cluster = CqlConfigHelper.getInputCluster(ConfigHelper.getInputInitialAddress(conf).split(","), conf);
    } catch (Exception e) {
        throw new RuntimeException(
                "Unable to create cluster for table: " + cfName + ", in keyspace: " + keyspace, e);
    }
    // cluster should be represent to a valid cluster now
    session = cluster.connect(quote(keyspace));
    Preconditions.checkState(session != null, "Can't create connection session");
    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion().toInt();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery)
            && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    log.trace("cqlQuery {}", cqlQuery);
    distinctKeyIterator = new DistinctKeyIterator();
    log.trace("created {}", distinctKeyIterator);
}

From source file:org.apache.hadoop.hive.cassandra.input.ColumnFamilyWideRowRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = context.getConfiguration();
    predicate = ConfigHelper.getInputSlicePredicate(conf);
    if (!isSliceRangePredicate(predicate)) {
        throw new AssertionError("WideRowsRequire a slice range");
    }/* w  w  w  .ja  v a2  s .  c  om*/

    totalRowCount = ConfigHelper.getInputSplitSize(conf);
    Log.info("total rows = " + totalRowCount);
    batchRowCount = 1;
    rowPageSize = predicate.getSlice_range().getCount();
    startSlicePredicate = predicate.getSlice_range().start;
    cfName = ConfigHelper.getInputColumnFamily(conf);
    consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getReadConsistencyLevel(conf));

    keyspace = ConfigHelper.getInputKeyspace(conf);

    try {
        // only need to connect once
        if (socket != null && socket.isOpen()) {
            return;
        }

        // create connection using thrift
        String location = getLocation();
        socket = new TSocket(location, ConfigHelper.getInputRpcPort(conf));
        TBinaryProtocol binaryProtocol = new TBinaryProtocol(new TFramedTransport(socket));
        client = new Cassandra.Client(binaryProtocol);
        socket.open();

        // log in
        client.set_keyspace(keyspace);
        if (ConfigHelper.getInputKeyspaceUserName(conf) != null) {
            Map<String, String> creds = new HashMap<String, String>();
            creds.put(IAuthenticator.USERNAME_KEY, ConfigHelper.getInputKeyspaceUserName(conf));
            creds.put(IAuthenticator.PASSWORD_KEY, ConfigHelper.getInputKeyspacePassword(conf));
            AuthenticationRequest authRequest = new AuthenticationRequest(creds);
            client.login(authRequest);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    iter = new WideRowIterator();
}

From source file:org.janusgraph.hadoop.formats.cassandra.CqlBridgeRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength()
            : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try {// w w  w.j ava2s.c o m
        if (cluster != null) {
            return;
        }
        // create a Cluster instance
        String[] locations = split.getLocations();
        //            cluster = CqlConfigHelper.getInputCluster(locations, conf);
        // disregard the conf as it brings some unforeseen issues.
        cluster = Cluster.builder().addContactPoints(locations).build();
    } catch (Exception e) {
        throw new RuntimeException(
                "Unable to create cluster for table: " + cfName + ", in keyspace: " + keyspace, e);
    }
    // cluster should be represent to a valid cluster now
    session = cluster.connect(quote(keyspace));
    Preconditions.checkState(session != null, "Can't create connection session");
    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion().toInt();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery)
            && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    log.trace("cqlQuery {}", cqlQuery);
    distinctKeyIterator = new DistinctKeyIterator();
    log.trace("created {}", distinctKeyIterator);
}