List of usage examples for org.apache.cassandra.hadoop ConfigHelper setRangeBatchSize
public static void setRangeBatchSize(Configuration conf, int batchsize)
From source file:WordCount.java
License:Apache License
public int run(String[] args) throws Exception { ///start/* w ww . ja v a 2s . co m*/ final long startTime = System.currentTimeMillis(); String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1]; } logger.info("output reducer type: " + outputReducerType); // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better ConfigHelper.setRangeBatchSize(getConf(), 99); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "userId"; Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); //Change partitioner here ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // this will cause the predicate to be ignored in favor of scanning everything as a wide row //Son degisiklik Super Column Support ? // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner"); job.waitForCompletion(true); } final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println(); System.out.println("Job Finished in " + duration + " seconds"); System.out.println(); return 0; }
From source file:WordCount.java
License:Apache License
public int run(String[] args) throws Exception { ///start//from w w w .j av a2 s . c o m final long startTime = System.currentTimeMillis(); String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1]; } logger.info("output reducer type: " + outputReducerType); // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better ConfigHelper.setRangeBatchSize(getConf(), 99); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "userId"; Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // this will cause the predicate to be ignored in favor of scanning everything as a wide row //Son degisiklik // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true); //System.out.println("tessssssaaat"); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner"); job.waitForCompletion(true); } //print final double duration = (System.currentTimeMillis() - startTime) / 1000.0; // after System.out.println(); System.out.println("Job Finished in " + duration + " seconds"); System.out.println(); return 0; }
From source file:WordCount.java
License:Apache License
public int run(String[] args) throws Exception { ///start//from w ww . jav a 2 s .c om final long startTime = System.currentTimeMillis(); String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1]; } logger.info("output reducer type: " + outputReducerType); // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better ConfigHelper.setRangeBatchSize(getConf(), 99); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "userId"; Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // this will cause the predicate to be ignored in favor of scanning everything as a wide row //Son degisiklik // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true); //System.out.println("tessssssaaat"); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner"); job.waitForCompletion(true); } //print final double duration = (System.currentTimeMillis() - startTime) / 1000.0; // after System.out.println(); System.out.println("Job Finished in " + duration + " seconds"); System.out.println(); return 0; }
From source file:WordCount.java
License:Apache License
public int run(String[] args) throws Exception { ///start// w ww.ja v a 2 s .co m final long startTime = System.currentTimeMillis(); String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1]; } logger.info("output reducer type: " + outputReducerType); // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better ConfigHelper.setRangeBatchSize(getConf(), 99); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "userId"; Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); //System.out.println("test"); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // this will cause the predicate to be ignored in favor of scanning everything as a wide row //Son degisiklik // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner"); job.waitForCompletion(true); } //print final double duration = (System.currentTimeMillis() - startTime) / 1000.0; // after System.out.println(); System.out.println("Job Finished in " + duration + " seconds"); System.out.println(); return 0; }
From source file:cassandra_mapreduce.MapReduceCassandraDB.java
License:GNU General Public License
public int run(String[] args) throws Exception { String columnName = "value"; getConf().set(CONF_COLUMN_NAME, columnName); getConf().set("mapred.job.tracker", args[0] + ":8021"); Job job = new Job(getConf(), "Phase1"); job.setJarByClass(MapReduceCassandraDB.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setInputFormatClass(ColumnFamilyInputFormat.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setRangeBatchSize(job.getConfiguration(), 800); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), args[0]); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes()))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true);//from w w w . j a va 2s. co m //Phase 2 Job job2 = new Job(getConf(), "Phase2"); job2.setJarByClass(MapReduceCassandraDB.class); job2.setMapperClass(Mapper2.class); job2.setReducerClass(Reducer2.class); job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(IntWritable.class); job2.setOutputKeyClass(ByteBuffer.class); job2.setOutputValueClass(List.class); job2.setInputFormatClass(ColumnFamilyInputFormat.class); job2.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job2.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY2); ConfigHelper.setRpcPort(job2.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job2.getConfiguration(), args[0]); ConfigHelper.setPartitioner(job2.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job2.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); SlicePredicate predicate2 = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes()))); ConfigHelper.setInputSlicePredicate(job2.getConfiguration(), predicate2); job2.waitForCompletion(true); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(Text.class); // // job.setInputFormatClass(ColumnFamilyInputFormat.class); // FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX)); // // ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); // ConfigHelper.setInitialAddress(job.getConfiguration(), args[0]); // ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); // SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes()))); // ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // // job.waitForCompletion(true); return 0; }
From source file:org.apache.hadoop.hive.cassandra.input.cql.HiveCqlInputFormat.java
License:Apache License
@Override public RecordReader<MapWritableComparable, MapWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split; List<String> columns = CqlSerDe.parseColumnMapping(cassandraSplit.getColumnMapping()); List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (columns.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); }/*from w ww .j a va 2 s .co m*/ ColumnFamilySplit cfSplit = cassandraSplit.getSplit(); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; SlicePredicate predicate = new SlicePredicate(); predicate.setColumn_names(getColumnNames(columns, readColIDs)); try { boolean wideRows = true; ConfigHelper.setInputColumnFamily(tac.getConfiguration(), cassandraSplit.getKeyspace(), cassandraSplit.getColumnFamily(), wideRows); ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate); ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize()); ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + ""); ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost()); ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner()); // Set Split Size ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize()); LOG.info("Validators : " + tac.getConfiguration().get(CassandraColumnSerDe.CASSANDRA_VALIDATOR_TYPE)); List<IndexExpression> indexExpr = parseFilterPredicate(jobConf); if (indexExpr != null) { //We have pushed down a filter from the Hive query, we can use this against secondary indexes ConfigHelper.setInputRange(tac.getConfiguration(), indexExpr); } CqlHiveRecordReader rr = new CqlHiveRecordReader(new CqlPagingRecordReader()); rr.initialize(cfSplit, tac); return rr; } catch (Exception ie) { throw new IOException(ie); } }
From source file:org.apache.hadoop.hive.cassandra.input.cql.HiveCqlInputFormat.java
License:Apache License
@Override public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { String ks = jobConf.get(AbstractCassandraSerDe.CASSANDRA_KEYSPACE_NAME); String cf = jobConf.get(AbstractCassandraSerDe.CASSANDRA_CF_NAME); int slicePredicateSize = jobConf.getInt(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_SIZE, AbstractCassandraSerDe.DEFAULT_SLICE_PREDICATE_SIZE); int sliceRangeSize = jobConf.getInt(AbstractCassandraSerDe.CASSANDRA_RANGE_BATCH_SIZE, AbstractCassandraSerDe.DEFAULT_RANGE_BATCH_SIZE); int splitSize = jobConf.getInt(AbstractCassandraSerDe.CASSANDRA_SPLIT_SIZE, AbstractCassandraSerDe.DEFAULT_SPLIT_SIZE); String cassandraColumnMapping = jobConf.get(AbstractCassandraSerDe.CASSANDRA_COL_MAPPING); int rpcPort = jobConf.getInt(AbstractCassandraSerDe.CASSANDRA_PORT, 9160); String host = jobConf.get(AbstractCassandraSerDe.CASSANDRA_HOST); String partitioner = jobConf.get(AbstractCassandraSerDe.CASSANDRA_PARTITIONER); if (cassandraColumnMapping == null) { throw new IOException("cassandra.columns.mapping required for Cassandra Table."); }// ww w . j a v a 2 s . c o m SliceRange range = new SliceRange(); range.setStart(new byte[0]); range.setFinish(new byte[0]); range.setReversed(false); range.setCount(slicePredicateSize); SlicePredicate predicate = new SlicePredicate(); predicate.setSlice_range(range); ConfigHelper.setInputRpcPort(jobConf, "" + rpcPort); ConfigHelper.setInputInitialAddress(jobConf, host); ConfigHelper.setInputPartitioner(jobConf, partitioner); ConfigHelper.setInputSlicePredicate(jobConf, predicate); ConfigHelper.setInputColumnFamily(jobConf, ks, cf); ConfigHelper.setRangeBatchSize(jobConf, sliceRangeSize); ConfigHelper.setInputSplitSize(jobConf, splitSize); Job job = new Job(jobConf); JobContext jobContext = new JobContext(job.getConfiguration(), job.getJobID()); Path[] tablePaths = FileInputFormat.getInputPaths(jobContext); List<org.apache.hadoop.mapreduce.InputSplit> splits = getSplits(jobContext); InputSplit[] results = new InputSplit[splits.size()]; for (int i = 0; i < splits.size(); ++i) { HiveCassandraStandardSplit csplit = new HiveCassandraStandardSplit((ColumnFamilySplit) splits.get(i), cassandraColumnMapping, tablePaths[0]); csplit.setKeyspace(ks); csplit.setColumnFamily(cf); csplit.setRangeBatchSize(sliceRangeSize); csplit.setSplitSize(splitSize); csplit.setHost(host); csplit.setPort(rpcPort); csplit.setSlicePredicateSize(slicePredicateSize); csplit.setPartitioner(partitioner); csplit.setColumnMapping(cassandraColumnMapping); results[i] = csplit; } return results; }
From source file:org.apache.hadoop.hive.cassandra.input.HiveCassandraStandardColumnInputFormat.java
License:Apache License
@Override public RecordReader<BytesWritable, MapWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split; List<String> columns = CassandraColumnSerDe.parseColumnMapping(cassandraSplit.getColumnMapping()); isTransposed = CassandraColumnSerDe.isTransposed(columns); List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (columns.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); }/*w ww.j a v a 2s .c om*/ org.apache.cassandra.hadoop.ColumnFamilySplit cfSplit = cassandraSplit.getSplit(); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; SlicePredicate predicate = new SlicePredicate(); if (isTransposed || readColIDs.size() == columns.size() || readColIDs.size() == 0) { SliceRange range = new SliceRange(); AbstractType comparator = BytesType.instance; String comparatorType = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR); if (comparatorType != null && !comparatorType.equals("")) { try { comparator = TypeParser.parse(comparatorType); } catch (ConfigurationException ex) { throw new IOException("Comparator class not found."); } catch (SyntaxException e) { throw new IOException(e); } } String sliceStart = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START); String sliceEnd = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH); String reversed = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED); range.setStart(comparator.fromString(sliceStart == null ? "" : sliceStart)); range.setFinish(comparator.fromString(sliceEnd == null ? "" : sliceEnd)); range.setReversed(reversed == null ? false : reversed.equals("true")); range.setCount(cassandraSplit.getSlicePredicateSize()); predicate.setSlice_range(range); } else { int iKey = columns.indexOf(CassandraColumnSerDe.CASSANDRA_KEY_COLUMN); predicate.setColumn_names(getColumnNames(iKey, columns, readColIDs)); } try { boolean wideRows = false; if (isTransposed && tac.getConfiguration() .getBoolean(CassandraColumnSerDe.CASSANDRA_ENABLE_WIDEROW_ITERATOR, true)) { wideRows = true; } ConfigHelper.setInputColumnFamily(tac.getConfiguration(), cassandraSplit.getKeyspace(), cassandraSplit.getColumnFamily(), wideRows); ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate); ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize()); ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + ""); ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost()); ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner()); // Set Split Size ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize()); LOG.info("Validators : " + tac.getConfiguration().get(CassandraColumnSerDe.CASSANDRA_VALIDATOR_TYPE)); List<IndexExpression> indexExpr = parseFilterPredicate(jobConf); if (indexExpr != null) { //We have pushed down a filter from the Hive query, we can use this against secondary indexes ConfigHelper.setInputRange(tac.getConfiguration(), indexExpr); } CassandraHiveRecordReader rr = new CassandraHiveRecordReader(new ColumnFamilyRecordReader(), isTransposed); rr.initialize(cfSplit, tac); return rr; } catch (Exception ie) { throw new IOException(ie); } }