Example usage for org.apache.cassandra.hadoop ConfigHelper setOutputColumnFamily

Introduction

In this page you can find the example usage for org.apache.cassandra.hadoop ConfigHelper setOutputColumnFamily.

Prototype

public static void setOutputColumnFamily(Configuration conf, String keyspace, String columnFamily)

Source Link

Document

Set the column family for the output of this job.

Usage

From source file:net.orpiske.tcs.wc.main.Main.java

License:Apache License

/**
 * Setup the output to dump the M/R result to word_cloud table on
 * Cassandra/*from w ww  . ja  v  a2s .  com*/
 * @param configuration
 */
private void outputConfiguration(Configuration configuration) {
    ConfigHelper.setOutputInitialAddress(configuration, DB_HOST);
    ConfigHelper.setOutputColumnFamily(configuration, KEYSPACE, OUTPUT_TABLE);
    ConfigHelper.setOutputPartitioner(configuration, PARTITIONER);

    String query = "UPDATE " + KEYSPACE + "." + OUTPUT_TABLE
            + " SET hash = ?, domain = ?, word = ?, occurrences = ?, reference_date = ? ";
    CqlConfigHelper.setOutputCql(configuration, query);
}

From source file:org.wikimedia.analytics.refinery.cassandra.CassandraXSVLoader.java

License:Apache License

/**
 * Runs the map-reduce job/* w w  w  .  j a va2s  . c  o  m*/
 */
public int run(String[] args) throws Exception {

    // Configuration from Tool
    Configuration conf = getConf();

    // Checking configuration parameters
    if (!checkConfParameters()) {
        logger.error("Problem with configuration, aborting.");
        System.exit(1);
    }

    //Build cql query
    String cqlQuery = makeCqlQuery();
    logger.info("CQL Query to be run: " + cqlQuery);

    // Parameters ok -> job configuration and launch
    Job job = new Job(conf, "CassandraXSVLoader");
    job.setJarByClass(CassandraXSVLoader.class);

    // Identity Mapper - Nothing to get done at map time
    job.setMapperClass(Mapper.class);
    FileInputFormat.addInputPath(job, new Path(conf.get(INPUT_PATH_PROP)));

    // reducer to cassandra
    job.setReducerClass(ReducerToCassandra.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Map.class);
    job.setOutputValueClass(List.class);

    // Use cassandra cql output format
    // This is where the actual connection and data push
    // to cassandra is made
    job.setOutputFormatClass(CqlOutputFormat.class);

    ConfigHelper.setOutputColumnFamily(job.getConfiguration(), quoteIdentifier(conf.get(OUTPUT_KEYSPACE_PROP)),
            quoteIdentifier(conf.get(OUTPUT_COLUMN_FAMILY_PROP)));

    CqlConfigHelper.setOutputCql(job.getConfiguration(), cqlQuery);
    ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner");

    ConfigHelper.setOutputInitialAddress(job.getConfiguration(), conf.get(CASSANDRA_HOST_PROP));

    CqlConfigHelper.setUserNameAndPassword(job.getConfiguration(), conf.get(CASSANDRA_USER_PROP),
            conf.get(CASSANDRA_PASSWD_PROP));

    // If batch size parameters are set, use them
    if ((conf.getInt(CASSANDRA_NODES, 0) > 0) && (conf.getInt(BATCH_SIZE_PROP, 0) > 0)) {
        conf.setInt(ColumnFamilyOutputFormat.BATCH_THRESHOLD, conf.getInt(BATCH_SIZE_PROP, 0));
        conf.setInt(ColumnFamilyOutputFormat.QUEUE_SIZE,
                conf.getInt(BATCH_SIZE_PROP, 0) * conf.getInt(CASSANDRA_NODES, 0) + 1);
    }

    job.waitForCompletion(true);
    return 0;
}