List of usage examples for org.apache.cassandra.hadoop ConfigHelper setOutputColumnFamily
public static void setOutputColumnFamily(Configuration conf, String keyspace, String columnFamily)
From source file:net.orpiske.tcs.wc.main.Main.java
License:Apache License
/** * Setup the output to dump the M/R result to word_cloud table on * Cassandra/*from w ww . ja v a2s . com*/ * @param configuration */ private void outputConfiguration(Configuration configuration) { ConfigHelper.setOutputInitialAddress(configuration, DB_HOST); ConfigHelper.setOutputColumnFamily(configuration, KEYSPACE, OUTPUT_TABLE); ConfigHelper.setOutputPartitioner(configuration, PARTITIONER); String query = "UPDATE " + KEYSPACE + "." + OUTPUT_TABLE + " SET hash = ?, domain = ?, word = ?, occurrences = ?, reference_date = ? "; CqlConfigHelper.setOutputCql(configuration, query); }
From source file:org.wikimedia.analytics.refinery.cassandra.CassandraXSVLoader.java
License:Apache License
/** * Runs the map-reduce job/* w w w . j a va2s . c o m*/ */ public int run(String[] args) throws Exception { // Configuration from Tool Configuration conf = getConf(); // Checking configuration parameters if (!checkConfParameters()) { logger.error("Problem with configuration, aborting."); System.exit(1); } //Build cql query String cqlQuery = makeCqlQuery(); logger.info("CQL Query to be run: " + cqlQuery); // Parameters ok -> job configuration and launch Job job = new Job(conf, "CassandraXSVLoader"); job.setJarByClass(CassandraXSVLoader.class); // Identity Mapper - Nothing to get done at map time job.setMapperClass(Mapper.class); FileInputFormat.addInputPath(job, new Path(conf.get(INPUT_PATH_PROP))); // reducer to cassandra job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Map.class); job.setOutputValueClass(List.class); // Use cassandra cql output format // This is where the actual connection and data push // to cassandra is made job.setOutputFormatClass(CqlOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), quoteIdentifier(conf.get(OUTPUT_KEYSPACE_PROP)), quoteIdentifier(conf.get(OUTPUT_COLUMN_FAMILY_PROP))); CqlConfigHelper.setOutputCql(job.getConfiguration(), cqlQuery); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), conf.get(CASSANDRA_HOST_PROP)); CqlConfigHelper.setUserNameAndPassword(job.getConfiguration(), conf.get(CASSANDRA_USER_PROP), conf.get(CASSANDRA_PASSWD_PROP)); // If batch size parameters are set, use them if ((conf.getInt(CASSANDRA_NODES, 0) > 0) && (conf.getInt(BATCH_SIZE_PROP, 0) > 0)) { conf.setInt(ColumnFamilyOutputFormat.BATCH_THRESHOLD, conf.getInt(BATCH_SIZE_PROP, 0)); conf.setInt(ColumnFamilyOutputFormat.QUEUE_SIZE, conf.getInt(BATCH_SIZE_PROP, 0) * conf.getInt(CASSANDRA_NODES, 0) + 1); } job.waitForCompletion(true); return 0; }