List of usage examples for org.apache.cassandra.hadoop ConfigHelper getInputColumnFamily
public static String getInputColumnFamily(Configuration conf)
From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java
License:Apache License
private static void validateConfiguration(Configuration conf) { if (ConfigHelper.getInputKeyspace(conf) == null || ConfigHelper.getInputColumnFamily(conf) == null) { throw new UnsupportedOperationException( "you must set the keyspace and columnfamily with setColumnFamily()"); }// w w w . java2 s . c o m if (ConfigHelper.getInputSlicePredicate(conf) == null) { throw new UnsupportedOperationException("you must set the predicate with setPredicate"); } if (ConfigHelper.getInputInitialAddress(conf) == null) throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node"); if (ConfigHelper.getInputPartitioner(conf) == null) throw new UnsupportedOperationException("You must set the Cassandra partitioner class"); }
From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java
License:Apache License
public List<InputSplit> getSplits(JobContext context) throws IOException { Configuration conf = context.getConfiguration(); validateConfiguration(conf);// w w w . j a va 2 s. co m // cannonical ranges and nodes holding replicas List<TokenRange> masterRangeNodes = getRangeMap(conf); keyspace = ConfigHelper.getInputKeyspace(context.getConfiguration()); cfName = ConfigHelper.getInputColumnFamily(context.getConfiguration()); partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration()); logger.debug("partitioner is " + partitioner); // cannonical ranges, split into pieces, fetching the splits in parallel ExecutorService executor = Executors.newCachedThreadPool(); List<InputSplit> splits = new ArrayList<InputSplit>(); try { List<Future<List<InputSplit>>> splitfutures = new ArrayList<Future<List<InputSplit>>>(); KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf); Range<Token> jobRange = null; if (jobKeyRange != null && jobKeyRange.start_token != null) { assert partitioner .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner"; assert jobKeyRange.start_key == null : "only start_token supported"; assert jobKeyRange.end_key == null : "only end_token supported"; jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token), partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner); } for (TokenRange range : masterRangeNodes) { if (jobRange == null) { // for each range, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(range, conf))); } else { Range<Token> dhtRange = new Range<Token>( partitioner.getTokenFactory().fromString(range.start_token), partitioner.getTokenFactory().fromString(range.end_token), partitioner); if (dhtRange.intersects(jobRange)) { for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) { range.start_token = partitioner.getTokenFactory().toString(intersection.left); range.end_token = partitioner.getTokenFactory().toString(intersection.right); // for each range, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(range, conf))); } } } } // wait until we have all the results back for (Future<List<InputSplit>> futureInputSplits : splitfutures) { try { splits.addAll(futureInputSplits.get()); } catch (Exception e) { throw new IOException("Could not get input splits", e); } } } finally { executor.shutdownNow(); } assert splits.size() > 0; Collections.shuffle(splits, new Random(System.nanoTime())); return splits; }
From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyRecordReader.java
License:Apache License
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = context.getConfiguration(); KeyRange jobRange = ConfigHelper.getInputKeyRange(conf); filter = jobRange == null ? null : jobRange.row_filter; predicate = ConfigHelper.getInputSlicePredicate(conf); boolean widerows = ConfigHelper.getInputIsWide(conf); isEmptyPredicate = isEmptyPredicate(predicate); totalRowCount = ConfigHelper.getInputSplitSize(conf); batchSize = ConfigHelper.getRangeBatchSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getReadConsistencyLevel(conf)); keyspace = ConfigHelper.getInputKeyspace(conf); try {//from w w w. j a va 2 s. co m // only need to connect once if (socket != null && socket.isOpen()) return; // create connection using thrift String location = getLocation(); socket = new TSocket(location, ConfigHelper.getInputRpcPort(conf)); TTransport transport = ConfigHelper.getInputTransportFactory(conf).openTransport(socket); TBinaryProtocol binaryProtocol = new TBinaryProtocol(transport); client = new Cassandra.Client(binaryProtocol); // log in client.set_keyspace(keyspace); if (ConfigHelper.getInputKeyspaceUserName(conf) != null) { Map<String, String> creds = new HashMap<String, String>(); creds.put(IAuthenticator.USERNAME_KEY, ConfigHelper.getInputKeyspaceUserName(conf)); creds.put(IAuthenticator.PASSWORD_KEY, ConfigHelper.getInputKeyspacePassword(conf)); AuthenticationRequest authRequest = new AuthenticationRequest(creds); client.login(authRequest); } } catch (Exception e) { throw new RuntimeException(e); } iter = widerows ? new WideRowIterator() : new StaticRowIterator(); logger.debug("created {}", iter); }
From source file:grakn.core.server.session.reader.GraknCqlBridgeRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = HadoopCompat.getConfiguration(context); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); keyspace = ConfigHelper.getInputKeyspace(conf); partitioner = ConfigHelper.getInputPartitioner(conf); inputColumns = CqlConfigHelper.getInputcolumns(conf); userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf); try {// w w w . j ava 2 s . c o m if (cluster != null) { return; } // create a Cluster instance String[] locations = split.getLocations(); // Previous implementation of this class was instantiating a new Clutser with the following comment: // "disregard the conf as it brings some unforeseen issues." // Cluster.builder().addContactPoints(locations).build(); // The above ignores the config so it's not possible to use it when we need to change default ports // as they won't be correctly propagated. So now we create Cluster using conf. // If this keeps breaking we might need to investigate further. cluster = CqlConfigHelper.getInputCluster(ConfigHelper.getInputInitialAddress(conf).split(","), conf); } catch (Exception e) { throw new RuntimeException( "Unable to create cluster for table: " + cfName + ", in keyspace: " + keyspace, e); } // cluster should be represent to a valid cluster now session = cluster.connect(quote(keyspace)); Preconditions.checkState(session != null, "Can't create connection session"); //get negotiated serialization protocol nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion().toInt(); // If the user provides a CQL query then we will use it without validation // otherwise we will fall back to building a query using the: // inputColumns // whereClauses cqlQuery = CqlConfigHelper.getInputCql(conf); // validate that the user hasn't tried to give us a custom query along with input columns // and where clauses if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) { throw new AssertionError("Cannot define a custom query with input columns and / or where clauses"); } if (StringUtils.isEmpty(cqlQuery)) { cqlQuery = buildQuery(); } log.trace("cqlQuery {}", cqlQuery); distinctKeyIterator = new DistinctKeyIterator(); log.trace("created {}", distinctKeyIterator); }
From source file:org.apache.hadoop.hive.cassandra.input.ColumnFamilyWideRowRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = context.getConfiguration(); predicate = ConfigHelper.getInputSlicePredicate(conf); if (!isSliceRangePredicate(predicate)) { throw new AssertionError("WideRowsRequire a slice range"); }/* w w w .ja v a2 s . c om*/ totalRowCount = ConfigHelper.getInputSplitSize(conf); Log.info("total rows = " + totalRowCount); batchRowCount = 1; rowPageSize = predicate.getSlice_range().getCount(); startSlicePredicate = predicate.getSlice_range().start; cfName = ConfigHelper.getInputColumnFamily(conf); consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getReadConsistencyLevel(conf)); keyspace = ConfigHelper.getInputKeyspace(conf); try { // only need to connect once if (socket != null && socket.isOpen()) { return; } // create connection using thrift String location = getLocation(); socket = new TSocket(location, ConfigHelper.getInputRpcPort(conf)); TBinaryProtocol binaryProtocol = new TBinaryProtocol(new TFramedTransport(socket)); client = new Cassandra.Client(binaryProtocol); socket.open(); // log in client.set_keyspace(keyspace); if (ConfigHelper.getInputKeyspaceUserName(conf) != null) { Map<String, String> creds = new HashMap<String, String>(); creds.put(IAuthenticator.USERNAME_KEY, ConfigHelper.getInputKeyspaceUserName(conf)); creds.put(IAuthenticator.PASSWORD_KEY, ConfigHelper.getInputKeyspacePassword(conf)); AuthenticationRequest authRequest = new AuthenticationRequest(creds); client.login(authRequest); } } catch (Exception e) { throw new RuntimeException(e); } iter = new WideRowIterator(); }
From source file:org.janusgraph.hadoop.formats.cassandra.CqlBridgeRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = HadoopCompat.getConfiguration(context); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); keyspace = ConfigHelper.getInputKeyspace(conf); partitioner = ConfigHelper.getInputPartitioner(conf); inputColumns = CqlConfigHelper.getInputcolumns(conf); userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf); try {// w w w.j ava2s.c o m if (cluster != null) { return; } // create a Cluster instance String[] locations = split.getLocations(); // cluster = CqlConfigHelper.getInputCluster(locations, conf); // disregard the conf as it brings some unforeseen issues. cluster = Cluster.builder().addContactPoints(locations).build(); } catch (Exception e) { throw new RuntimeException( "Unable to create cluster for table: " + cfName + ", in keyspace: " + keyspace, e); } // cluster should be represent to a valid cluster now session = cluster.connect(quote(keyspace)); Preconditions.checkState(session != null, "Can't create connection session"); //get negotiated serialization protocol nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion().toInt(); // If the user provides a CQL query then we will use it without validation // otherwise we will fall back to building a query using the: // inputColumns // whereClauses cqlQuery = CqlConfigHelper.getInputCql(conf); // validate that the user hasn't tried to give us a custom query along with input columns // and where clauses if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) { throw new AssertionError("Cannot define a custom query with input columns and / or where clauses"); } if (StringUtils.isEmpty(cqlQuery)) { cqlQuery = buildQuery(); } log.trace("cqlQuery {}", cqlQuery); distinctKeyIterator = new DistinctKeyIterator(); log.trace("created {}", distinctKeyIterator); }