List of usage examples for org.apache.cassandra.hadoop ConfigHelper setInputColumnFamily
public static void setInputColumnFamily(Configuration conf, String keyspace, String columnFamily)
From source file:com.impetus.code.examples.hadoop.cassandra.wordcount.WordCount.java
License:Apache License
public int run(String[] args) throws Exception { String outputReducerType = "cassandra"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1];// w w w . j av a 2 s.c om } logger.info("output reducer type: " + outputReducerType); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "text" + i; getConf().set(CONF_COLUMN_NAME, columnName); Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setCombinerClass(ReducerToFilesystem.class); job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, INPUT_COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true); } return 0; }
From source file:com.impetus.code.examples.hadoop.cassandra.wordcount.WordCountCounters.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(getConf(), "wordcountcounters"); job.setJarByClass(WordCountCounters.class); job.setMapperClass(SumMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX)); job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCountCounters.COUNTER_COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setSlice_range(new SliceRange().setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER) .setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).setCount(100)); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true);/*w w w .j av a 2 s . c om*/ return 0; }
From source file:net.orpiske.tcs.wc.main.Main.java
License:Apache License
/** * Setup the M/R job to read from the references table from Cassandra * @param configuration// w ww . j a va2 s .c o m */ private void inputConfiguration(Configuration configuration) { ConfigHelper.setInputRpcPort(configuration, DB_PORT); ConfigHelper.setInputInitialAddress(configuration, DB_HOST); ConfigHelper.setInputPartitioner(configuration, PARTITIONER); ConfigHelper.setInputColumnFamily(configuration, KEYSPACE, INPUT_TABLE); List<ByteBuffer> columns = Arrays.asList(ByteBufferUtil.bytes("reference_text"), ByteBufferUtil.bytes("domain")); SlicePredicate predicate = new SlicePredicate().setColumn_names(columns); ConfigHelper.setInputSlicePredicate(configuration, predicate); }
From source file:org.apache.hadoop.hive.cassandra.input.cql.HiveCqlInputFormat.java
License:Apache License
@Override public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { String ks = jobConf.get(AbstractCassandraSerDe.CASSANDRA_KEYSPACE_NAME); String cf = jobConf.get(AbstractCassandraSerDe.CASSANDRA_CF_NAME); int slicePredicateSize = jobConf.getInt(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_SIZE, AbstractCassandraSerDe.DEFAULT_SLICE_PREDICATE_SIZE); int sliceRangeSize = jobConf.getInt(AbstractCassandraSerDe.CASSANDRA_RANGE_BATCH_SIZE, AbstractCassandraSerDe.DEFAULT_RANGE_BATCH_SIZE); int splitSize = jobConf.getInt(AbstractCassandraSerDe.CASSANDRA_SPLIT_SIZE, AbstractCassandraSerDe.DEFAULT_SPLIT_SIZE); String cassandraColumnMapping = jobConf.get(AbstractCassandraSerDe.CASSANDRA_COL_MAPPING); int rpcPort = jobConf.getInt(AbstractCassandraSerDe.CASSANDRA_PORT, 9160); String host = jobConf.get(AbstractCassandraSerDe.CASSANDRA_HOST); String partitioner = jobConf.get(AbstractCassandraSerDe.CASSANDRA_PARTITIONER); if (cassandraColumnMapping == null) { throw new IOException("cassandra.columns.mapping required for Cassandra Table."); }// w w w .j av a 2 s . c o m SliceRange range = new SliceRange(); range.setStart(new byte[0]); range.setFinish(new byte[0]); range.setReversed(false); range.setCount(slicePredicateSize); SlicePredicate predicate = new SlicePredicate(); predicate.setSlice_range(range); ConfigHelper.setInputRpcPort(jobConf, "" + rpcPort); ConfigHelper.setInputInitialAddress(jobConf, host); ConfigHelper.setInputPartitioner(jobConf, partitioner); ConfigHelper.setInputSlicePredicate(jobConf, predicate); ConfigHelper.setInputColumnFamily(jobConf, ks, cf); ConfigHelper.setRangeBatchSize(jobConf, sliceRangeSize); ConfigHelper.setInputSplitSize(jobConf, splitSize); Job job = new Job(jobConf); JobContext jobContext = new JobContext(job.getConfiguration(), job.getJobID()); Path[] tablePaths = FileInputFormat.getInputPaths(jobContext); List<org.apache.hadoop.mapreduce.InputSplit> splits = getSplits(jobContext); InputSplit[] results = new InputSplit[splits.size()]; for (int i = 0; i < splits.size(); ++i) { HiveCassandraStandardSplit csplit = new HiveCassandraStandardSplit((ColumnFamilySplit) splits.get(i), cassandraColumnMapping, tablePaths[0]); csplit.setKeyspace(ks); csplit.setColumnFamily(cf); csplit.setRangeBatchSize(sliceRangeSize); csplit.setSplitSize(splitSize); csplit.setHost(host); csplit.setPort(rpcPort); csplit.setSlicePredicateSize(slicePredicateSize); csplit.setPartitioner(partitioner); csplit.setColumnMapping(cassandraColumnMapping); results[i] = csplit; } return results; }