List of usage examples for org.apache.cassandra.hadoop ConfigHelper setInputColumnFamily
public static void setInputColumnFamily(Configuration conf, String keyspace, String columnFamily, boolean widerows)
From source file:grakn.core.server.session.reader.GraknBinaryInputFormat.java
License:Open Source License
@Override public void setConf(final Configuration config) { super.setConf(config); // Copy some JanusGraph configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat ConfigHelper.setInputInitialAddress(config, janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]); if (janusgraphConf.has(GraphDatabaseConfiguration.STORAGE_PORT)) { ConfigHelper.setInputRpcPort(config, String.valueOf(janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_PORT))); }/*from w ww .j ava2 s .c o m*/ if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_USERNAME)) { ConfigHelper.setInputKeyspaceUserName(config, janusgraphConf.get(GraphDatabaseConfiguration.AUTH_USERNAME)); } if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD)) { ConfigHelper.setInputKeyspacePassword(config, janusgraphConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD)); } // Copy keyspace, force the CF setting to edgestore, honor widerows when set final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false); // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false ConfigHelper.setInputColumnFamily(config, janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE), mrConf.get(JanusGraphHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows); log.debug("Set keyspace: {}", janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE)); // Set the column slice bounds via Faunus' vertex query filter final SlicePredicate predicate = new SlicePredicate(); final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE); predicate.setSlice_range(getSliceRange(rangeBatchSize)); // TODO stop slicing the whole row ConfigHelper.setInputSlicePredicate(config, predicate); }
From source file:org.apache.hadoop.hive.cassandra.input.cql.HiveCqlInputFormat.java
License:Apache License
@Override public RecordReader<MapWritableComparable, MapWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split; List<String> columns = CqlSerDe.parseColumnMapping(cassandraSplit.getColumnMapping()); List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (columns.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); }/*from w w w . ja v a 2 s . c om*/ ColumnFamilySplit cfSplit = cassandraSplit.getSplit(); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; SlicePredicate predicate = new SlicePredicate(); predicate.setColumn_names(getColumnNames(columns, readColIDs)); try { boolean wideRows = true; ConfigHelper.setInputColumnFamily(tac.getConfiguration(), cassandraSplit.getKeyspace(), cassandraSplit.getColumnFamily(), wideRows); ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate); ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize()); ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + ""); ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost()); ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner()); // Set Split Size ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize()); LOG.info("Validators : " + tac.getConfiguration().get(CassandraColumnSerDe.CASSANDRA_VALIDATOR_TYPE)); List<IndexExpression> indexExpr = parseFilterPredicate(jobConf); if (indexExpr != null) { //We have pushed down a filter from the Hive query, we can use this against secondary indexes ConfigHelper.setInputRange(tac.getConfiguration(), indexExpr); } CqlHiveRecordReader rr = new CqlHiveRecordReader(new CqlPagingRecordReader()); rr.initialize(cfSplit, tac); return rr; } catch (Exception ie) { throw new IOException(ie); } }
From source file:org.apache.hadoop.hive.cassandra.input.HiveCassandraStandardColumnInputFormat.java
License:Apache License
@Override public RecordReader<BytesWritable, MapWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split; List<String> columns = CassandraColumnSerDe.parseColumnMapping(cassandraSplit.getColumnMapping()); isTransposed = CassandraColumnSerDe.isTransposed(columns); List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (columns.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); }// w ww .jav a 2 s.c o m org.apache.cassandra.hadoop.ColumnFamilySplit cfSplit = cassandraSplit.getSplit(); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; SlicePredicate predicate = new SlicePredicate(); if (isTransposed || readColIDs.size() == columns.size() || readColIDs.size() == 0) { SliceRange range = new SliceRange(); AbstractType comparator = BytesType.instance; String comparatorType = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR); if (comparatorType != null && !comparatorType.equals("")) { try { comparator = TypeParser.parse(comparatorType); } catch (ConfigurationException ex) { throw new IOException("Comparator class not found."); } catch (SyntaxException e) { throw new IOException(e); } } String sliceStart = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START); String sliceEnd = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH); String reversed = jobConf.get(AbstractCassandraSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED); range.setStart(comparator.fromString(sliceStart == null ? "" : sliceStart)); range.setFinish(comparator.fromString(sliceEnd == null ? "" : sliceEnd)); range.setReversed(reversed == null ? false : reversed.equals("true")); range.setCount(cassandraSplit.getSlicePredicateSize()); predicate.setSlice_range(range); } else { int iKey = columns.indexOf(CassandraColumnSerDe.CASSANDRA_KEY_COLUMN); predicate.setColumn_names(getColumnNames(iKey, columns, readColIDs)); } try { boolean wideRows = false; if (isTransposed && tac.getConfiguration() .getBoolean(CassandraColumnSerDe.CASSANDRA_ENABLE_WIDEROW_ITERATOR, true)) { wideRows = true; } ConfigHelper.setInputColumnFamily(tac.getConfiguration(), cassandraSplit.getKeyspace(), cassandraSplit.getColumnFamily(), wideRows); ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate); ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize()); ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + ""); ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost()); ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner()); // Set Split Size ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize()); LOG.info("Validators : " + tac.getConfiguration().get(CassandraColumnSerDe.CASSANDRA_VALIDATOR_TYPE)); List<IndexExpression> indexExpr = parseFilterPredicate(jobConf); if (indexExpr != null) { //We have pushed down a filter from the Hive query, we can use this against secondary indexes ConfigHelper.setInputRange(tac.getConfiguration(), indexExpr); } CassandraHiveRecordReader rr = new CassandraHiveRecordReader(new ColumnFamilyRecordReader(), isTransposed); rr.initialize(cfSplit, tac); return rr; } catch (Exception ie) { throw new IOException(ie); } }
From source file:org.janusgraph.hadoop.formats.cassandra.CassandraBinaryInputFormat.java
License:Apache License
@Override public void setConf(final Configuration config) { super.setConf(config); // Copy some JanusGraph configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat ConfigHelper.setInputInitialAddress(config, janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]); if (janusgraphConf.has(GraphDatabaseConfiguration.STORAGE_PORT)) ConfigHelper.setInputRpcPort(config, String.valueOf(janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_PORT))); if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_USERNAME)) ConfigHelper.setInputKeyspaceUserName(config, janusgraphConf.get(GraphDatabaseConfiguration.AUTH_USERNAME)); if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD)) ConfigHelper.setInputKeyspacePassword(config, janusgraphConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD)); // Copy keyspace, force the CF setting to edgestore, honor widerows when set final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false); // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false ConfigHelper.setInputColumnFamily(config, janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE), mrConf.get(JanusGraphHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows); log.debug("Set keyspace: {}", janusgraphConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE)); // Set the column slice bounds via Faunus's vertex query filter final SlicePredicate predicate = new SlicePredicate(); final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE); predicate.setSlice_range(getSliceRange(JanusGraphHadoopSetupCommon.DEFAULT_SLICE_QUERY, rangeBatchSize)); // TODO stop slicing the whole row ConfigHelper.setInputSlicePredicate(config, predicate); }
From source file:org.janusgraph.hadoop.formats.cql.CqlBinaryInputFormat.java
License:Apache License
@Override public void setConf(final Configuration config) { super.setConf(config); // Copy some JanusGraph configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat ConfigHelper.setInputInitialAddress(config, janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]); if (janusgraphConf.has(GraphDatabaseConfiguration.STORAGE_PORT)) ConfigHelper.setInputRpcPort(config, String.valueOf(janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_PORT))); if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_USERNAME)) ConfigHelper.setInputKeyspaceUserName(config, janusgraphConf.get(GraphDatabaseConfiguration.AUTH_USERNAME)); if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD)) ConfigHelper.setInputKeyspacePassword(config, janusgraphConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD)); // Copy keyspace, force the CF setting to edgestore, honor widerows when set final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false); // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false ConfigHelper.setInputColumnFamily(config, janusgraphConf.get(CQLConfigOptions.KEYSPACE), mrConf.get(JanusGraphHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows); log.debug("Set keyspace: {}", janusgraphConf.get(CQLConfigOptions.KEYSPACE)); // Set the column slice bounds via Faunus' vertex query filter final SlicePredicate predicate = new SlicePredicate(); final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE); predicate.setSlice_range(getSliceRange(rangeBatchSize)); // TODO stop slicing the whole row ConfigHelper.setInputSlicePredicate(config, predicate); }