List of usage examples for org.apache.cassandra.hadoop ConfigHelper getInputSplitSize
public static int getInputSplitSize(Configuration conf)
From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java
License:Apache License
private List<String> getSubSplits(String keyspace, String cfName, TokenRange range, Configuration conf) throws IOException { int splitsize = ConfigHelper.getInputSplitSize(conf); for (int i = 0; i < range.rpc_endpoints.size(); i++) { String host = range.rpc_endpoints.get(i); if (host == null || host.equals("0.0.0.0")) host = range.endpoints.get(i); try {//from w w w .j a v a 2 s . c om Cassandra.Client client = ConfigHelper.createConnection(conf, host, ConfigHelper.getInputRpcPort(conf)); client.set_keyspace(keyspace); return client.describe_splits(cfName, range.start_token, range.end_token, splitsize); } catch (IOException e) { logger.debug("failed connect to endpoint " + host, e); } catch (TException e) { throw new RuntimeException(e); } catch (InvalidRequestException e) { throw new RuntimeException(e); } } throw new IOException("failed connecting to all endpoints " + StringUtils.join(range.endpoints, ",")); }
From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyRecordReader.java
License:Apache License
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = context.getConfiguration(); KeyRange jobRange = ConfigHelper.getInputKeyRange(conf); filter = jobRange == null ? null : jobRange.row_filter; predicate = ConfigHelper.getInputSlicePredicate(conf); boolean widerows = ConfigHelper.getInputIsWide(conf); isEmptyPredicate = isEmptyPredicate(predicate); totalRowCount = ConfigHelper.getInputSplitSize(conf); batchSize = ConfigHelper.getRangeBatchSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getReadConsistencyLevel(conf)); keyspace = ConfigHelper.getInputKeyspace(conf); try {/*from ww w . j av a 2 s. c om*/ // only need to connect once if (socket != null && socket.isOpen()) return; // create connection using thrift String location = getLocation(); socket = new TSocket(location, ConfigHelper.getInputRpcPort(conf)); TTransport transport = ConfigHelper.getInputTransportFactory(conf).openTransport(socket); TBinaryProtocol binaryProtocol = new TBinaryProtocol(transport); client = new Cassandra.Client(binaryProtocol); // log in client.set_keyspace(keyspace); if (ConfigHelper.getInputKeyspaceUserName(conf) != null) { Map<String, String> creds = new HashMap<String, String>(); creds.put(IAuthenticator.USERNAME_KEY, ConfigHelper.getInputKeyspaceUserName(conf)); creds.put(IAuthenticator.PASSWORD_KEY, ConfigHelper.getInputKeyspacePassword(conf)); AuthenticationRequest authRequest = new AuthenticationRequest(creds); client.login(authRequest); } } catch (Exception e) { throw new RuntimeException(e); } iter = widerows ? new WideRowIterator() : new StaticRowIterator(); logger.debug("created {}", iter); }
From source file:grakn.core.server.session.reader.GraknCqlBridgeRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = HadoopCompat.getConfiguration(context); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); keyspace = ConfigHelper.getInputKeyspace(conf); partitioner = ConfigHelper.getInputPartitioner(conf); inputColumns = CqlConfigHelper.getInputcolumns(conf); userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf); try {/*from w ww . jav a 2 s . c om*/ if (cluster != null) { return; } // create a Cluster instance String[] locations = split.getLocations(); // Previous implementation of this class was instantiating a new Clutser with the following comment: // "disregard the conf as it brings some unforeseen issues." // Cluster.builder().addContactPoints(locations).build(); // The above ignores the config so it's not possible to use it when we need to change default ports // as they won't be correctly propagated. So now we create Cluster using conf. // If this keeps breaking we might need to investigate further. cluster = CqlConfigHelper.getInputCluster(ConfigHelper.getInputInitialAddress(conf).split(","), conf); } catch (Exception e) { throw new RuntimeException( "Unable to create cluster for table: " + cfName + ", in keyspace: " + keyspace, e); } // cluster should be represent to a valid cluster now session = cluster.connect(quote(keyspace)); Preconditions.checkState(session != null, "Can't create connection session"); //get negotiated serialization protocol nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion().toInt(); // If the user provides a CQL query then we will use it without validation // otherwise we will fall back to building a query using the: // inputColumns // whereClauses cqlQuery = CqlConfigHelper.getInputCql(conf); // validate that the user hasn't tried to give us a custom query along with input columns // and where clauses if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) { throw new AssertionError("Cannot define a custom query with input columns and / or where clauses"); } if (StringUtils.isEmpty(cqlQuery)) { cqlQuery = buildQuery(); } log.trace("cqlQuery {}", cqlQuery); distinctKeyIterator = new DistinctKeyIterator(); log.trace("created {}", distinctKeyIterator); }
From source file:org.apache.hadoop.hive.cassandra.input.ColumnFamilyWideRowRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = context.getConfiguration(); predicate = ConfigHelper.getInputSlicePredicate(conf); if (!isSliceRangePredicate(predicate)) { throw new AssertionError("WideRowsRequire a slice range"); }// w w w. j a v a2 s .c o m totalRowCount = ConfigHelper.getInputSplitSize(conf); Log.info("total rows = " + totalRowCount); batchRowCount = 1; rowPageSize = predicate.getSlice_range().getCount(); startSlicePredicate = predicate.getSlice_range().start; cfName = ConfigHelper.getInputColumnFamily(conf); consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getReadConsistencyLevel(conf)); keyspace = ConfigHelper.getInputKeyspace(conf); try { // only need to connect once if (socket != null && socket.isOpen()) { return; } // create connection using thrift String location = getLocation(); socket = new TSocket(location, ConfigHelper.getInputRpcPort(conf)); TBinaryProtocol binaryProtocol = new TBinaryProtocol(new TFramedTransport(socket)); client = new Cassandra.Client(binaryProtocol); socket.open(); // log in client.set_keyspace(keyspace); if (ConfigHelper.getInputKeyspaceUserName(conf) != null) { Map<String, String> creds = new HashMap<String, String>(); creds.put(IAuthenticator.USERNAME_KEY, ConfigHelper.getInputKeyspaceUserName(conf)); creds.put(IAuthenticator.PASSWORD_KEY, ConfigHelper.getInputKeyspacePassword(conf)); AuthenticationRequest authRequest = new AuthenticationRequest(creds); client.login(authRequest); } } catch (Exception e) { throw new RuntimeException(e); } iter = new WideRowIterator(); }
From source file:org.janusgraph.hadoop.formats.cassandra.CqlBridgeRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = HadoopCompat.getConfiguration(context); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); keyspace = ConfigHelper.getInputKeyspace(conf); partitioner = ConfigHelper.getInputPartitioner(conf); inputColumns = CqlConfigHelper.getInputcolumns(conf); userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf); try {// w w w .j a v a2s. c o m if (cluster != null) { return; } // create a Cluster instance String[] locations = split.getLocations(); // cluster = CqlConfigHelper.getInputCluster(locations, conf); // disregard the conf as it brings some unforeseen issues. cluster = Cluster.builder().addContactPoints(locations).build(); } catch (Exception e) { throw new RuntimeException( "Unable to create cluster for table: " + cfName + ", in keyspace: " + keyspace, e); } // cluster should be represent to a valid cluster now session = cluster.connect(quote(keyspace)); Preconditions.checkState(session != null, "Can't create connection session"); //get negotiated serialization protocol nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion().toInt(); // If the user provides a CQL query then we will use it without validation // otherwise we will fall back to building a query using the: // inputColumns // whereClauses cqlQuery = CqlConfigHelper.getInputCql(conf); // validate that the user hasn't tried to give us a custom query along with input columns // and where clauses if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) { throw new AssertionError("Cannot define a custom query with input columns and / or where clauses"); } if (StringUtils.isEmpty(cqlQuery)) { cqlQuery = buildQuery(); } log.trace("cqlQuery {}", cqlQuery); distinctKeyIterator = new DistinctKeyIterator(); log.trace("created {}", distinctKeyIterator); }