Example usage for org.apache.cassandra.hadoop ConfigHelper getInputSplitSize

Introduction

In this page you can find the example usage for org.apache.cassandra.hadoop ConfigHelper getInputSplitSize.

Prototype

public static int getInputSplitSize(Configuration conf)

Source Link

Usage

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

private List<String> getSubSplits(String keyspace, String cfName, TokenRange range, Configuration conf)
        throws IOException {
    int splitsize = ConfigHelper.getInputSplitSize(conf);
    for (int i = 0; i < range.rpc_endpoints.size(); i++) {
        String host = range.rpc_endpoints.get(i);

        if (host == null || host.equals("0.0.0.0"))
            host = range.endpoints.get(i);

        try {//from   w  w  w  .j  a  v  a  2 s .  c om
            Cassandra.Client client = ConfigHelper.createConnection(conf, host,
                    ConfigHelper.getInputRpcPort(conf));
            client.set_keyspace(keyspace);
            return client.describe_splits(cfName, range.start_token, range.end_token, splitsize);
        } catch (IOException e) {
            logger.debug("failed connect to endpoint " + host, e);
        } catch (TException e) {
            throw new RuntimeException(e);
        } catch (InvalidRequestException e) {
            throw new RuntimeException(e);
        }
    }
    throw new IOException("failed connecting to all endpoints " + StringUtils.join(range.endpoints, ","));
}

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyRecordReader.java

License:Apache License

public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = context.getConfiguration();
    KeyRange jobRange = ConfigHelper.getInputKeyRange(conf);
    filter = jobRange == null ? null : jobRange.row_filter;
    predicate = ConfigHelper.getInputSlicePredicate(conf);
    boolean widerows = ConfigHelper.getInputIsWide(conf);
    isEmptyPredicate = isEmptyPredicate(predicate);
    totalRowCount = ConfigHelper.getInputSplitSize(conf);
    batchSize = ConfigHelper.getRangeBatchSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getReadConsistencyLevel(conf));

    keyspace = ConfigHelper.getInputKeyspace(conf);

    try {/*from  ww  w . j  av a  2 s.  c om*/
        // only need to connect once
        if (socket != null && socket.isOpen())
            return;

        // create connection using thrift
        String location = getLocation();
        socket = new TSocket(location, ConfigHelper.getInputRpcPort(conf));
        TTransport transport = ConfigHelper.getInputTransportFactory(conf).openTransport(socket);
        TBinaryProtocol binaryProtocol = new TBinaryProtocol(transport);
        client = new Cassandra.Client(binaryProtocol);

        // log in
        client.set_keyspace(keyspace);
        if (ConfigHelper.getInputKeyspaceUserName(conf) != null) {
            Map<String, String> creds = new HashMap<String, String>();
            creds.put(IAuthenticator.USERNAME_KEY, ConfigHelper.getInputKeyspaceUserName(conf));
            creds.put(IAuthenticator.PASSWORD_KEY, ConfigHelper.getInputKeyspacePassword(conf));
            AuthenticationRequest authRequest = new AuthenticationRequest(creds);
            client.login(authRequest);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    iter = widerows ? new WideRowIterator() : new StaticRowIterator();
    logger.debug("created {}", iter);
}

From source file:grakn.core.server.session.reader.GraknCqlBridgeRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength()
            : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try {/*from  w ww .  jav  a  2  s  .  c  om*/
        if (cluster != null) {
            return;
        }
        // create a Cluster instance
        String[] locations = split.getLocations();
        // Previous implementation of this class was instantiating a new Clutser with the following comment:
        // "disregard the conf as it brings some unforeseen issues."
        // Cluster.builder().addContactPoints(locations).build();

        // The above ignores the config so it's not possible to use it when we need to change default ports
        // as they won't be correctly propagated. So now we create Cluster using conf.
        // If this keeps breaking we might need to investigate further.
        cluster = CqlConfigHelper.getInputCluster(ConfigHelper.getInputInitialAddress(conf).split(","), conf);
    } catch (Exception e) {
        throw new RuntimeException(
                "Unable to create cluster for table: " + cfName + ", in keyspace: " + keyspace, e);
    }
    // cluster should be represent to a valid cluster now
    session = cluster.connect(quote(keyspace));
    Preconditions.checkState(session != null, "Can't create connection session");
    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion().toInt();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery)
            && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    log.trace("cqlQuery {}", cqlQuery);
    distinctKeyIterator = new DistinctKeyIterator();
    log.trace("created {}", distinctKeyIterator);
}

From source file:org.apache.hadoop.hive.cassandra.input.ColumnFamilyWideRowRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = context.getConfiguration();
    predicate = ConfigHelper.getInputSlicePredicate(conf);
    if (!isSliceRangePredicate(predicate)) {
        throw new AssertionError("WideRowsRequire a slice range");
    }//  w w  w.  j  a  v a2  s .c o m

    totalRowCount = ConfigHelper.getInputSplitSize(conf);
    Log.info("total rows = " + totalRowCount);
    batchRowCount = 1;
    rowPageSize = predicate.getSlice_range().getCount();
    startSlicePredicate = predicate.getSlice_range().start;
    cfName = ConfigHelper.getInputColumnFamily(conf);
    consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getReadConsistencyLevel(conf));

    keyspace = ConfigHelper.getInputKeyspace(conf);

    try {
        // only need to connect once
        if (socket != null && socket.isOpen()) {
            return;
        }

        // create connection using thrift
        String location = getLocation();
        socket = new TSocket(location, ConfigHelper.getInputRpcPort(conf));
        TBinaryProtocol binaryProtocol = new TBinaryProtocol(new TFramedTransport(socket));
        client = new Cassandra.Client(binaryProtocol);
        socket.open();

        // log in
        client.set_keyspace(keyspace);
        if (ConfigHelper.getInputKeyspaceUserName(conf) != null) {
            Map<String, String> creds = new HashMap<String, String>();
            creds.put(IAuthenticator.USERNAME_KEY, ConfigHelper.getInputKeyspaceUserName(conf));
            creds.put(IAuthenticator.PASSWORD_KEY, ConfigHelper.getInputKeyspacePassword(conf));
            AuthenticationRequest authRequest = new AuthenticationRequest(creds);
            client.login(authRequest);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    iter = new WideRowIterator();
}

From source file:org.janusgraph.hadoop.formats.cassandra.CqlBridgeRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength()
            : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try {// w w w .j  a v  a2s.  c o  m
        if (cluster != null) {
            return;
        }
        // create a Cluster instance
        String[] locations = split.getLocations();
        //            cluster = CqlConfigHelper.getInputCluster(locations, conf);
        // disregard the conf as it brings some unforeseen issues.
        cluster = Cluster.builder().addContactPoints(locations).build();
    } catch (Exception e) {
        throw new RuntimeException(
                "Unable to create cluster for table: " + cfName + ", in keyspace: " + keyspace, e);
    }
    // cluster should be represent to a valid cluster now
    session = cluster.connect(quote(keyspace));
    Preconditions.checkState(session != null, "Can't create connection session");
    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion().toInt();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery)
            && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    log.trace("cqlQuery {}", cqlQuery);
    distinctKeyIterator = new DistinctKeyIterator();
    log.trace("created {}", distinctKeyIterator);
}