List of usage examples for org.apache.cassandra.hadoop.cql3 CqlConfigHelper setOutputCql
public static void setOutputCql(Configuration conf, String cql)
From source file:com.dse.pig.udfs.CqlStorage.java
License:Apache License
/** set store configuration settings */ public void setStoreLocation(String location, Job job) throws IOException { conf = job.getConfiguration();/* w w w .j ava 2s . c o m*/ setLocationFromUri(location); if (username != null && password != null) ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password); if (splitSize > 0) ConfigHelper.setInputSplitSize(conf, splitSize); if (partitionerClass != null) ConfigHelper.setOutputPartitioner(conf, partitionerClass); if (rpcPort != null) { ConfigHelper.setOutputRpcPort(conf, rpcPort); ConfigHelper.setInputRpcPort(conf, rpcPort); } if (initHostAddress != null) { ConfigHelper.setOutputInitialAddress(conf, initHostAddress); ConfigHelper.setInputInitialAddress(conf, initHostAddress); } ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family); CqlConfigHelper.setOutputCql(conf, outputQuery); setConnectionInformation(); if (ConfigHelper.getOutputRpcPort(conf) == 0) throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set"); if (ConfigHelper.getOutputInitialAddress(conf) == null) throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set"); if (ConfigHelper.getOutputPartitioner(conf) == null) throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set"); initSchema(storeSignature); }
From source file:net.orpiske.tcs.wc.main.Main.java
License:Apache License
/** * Setup the output to dump the M/R result to word_cloud table on * Cassandra/*from w ww.j a v a 2s . co m*/ * @param configuration */ private void outputConfiguration(Configuration configuration) { ConfigHelper.setOutputInitialAddress(configuration, DB_HOST); ConfigHelper.setOutputColumnFamily(configuration, KEYSPACE, OUTPUT_TABLE); ConfigHelper.setOutputPartitioner(configuration, PARTITIONER); String query = "UPDATE " + KEYSPACE + "." + OUTPUT_TABLE + " SET hash = ?, domain = ?, word = ?, occurrences = ?, reference_date = ? "; CqlConfigHelper.setOutputCql(configuration, query); }
From source file:org.wikimedia.analytics.refinery.cassandra.CassandraXSVLoader.java
License:Apache License
/** * Runs the map-reduce job// www .j a va 2s. c o m */ public int run(String[] args) throws Exception { // Configuration from Tool Configuration conf = getConf(); // Checking configuration parameters if (!checkConfParameters()) { logger.error("Problem with configuration, aborting."); System.exit(1); } //Build cql query String cqlQuery = makeCqlQuery(); logger.info("CQL Query to be run: " + cqlQuery); // Parameters ok -> job configuration and launch Job job = new Job(conf, "CassandraXSVLoader"); job.setJarByClass(CassandraXSVLoader.class); // Identity Mapper - Nothing to get done at map time job.setMapperClass(Mapper.class); FileInputFormat.addInputPath(job, new Path(conf.get(INPUT_PATH_PROP))); // reducer to cassandra job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Map.class); job.setOutputValueClass(List.class); // Use cassandra cql output format // This is where the actual connection and data push // to cassandra is made job.setOutputFormatClass(CqlOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), quoteIdentifier(conf.get(OUTPUT_KEYSPACE_PROP)), quoteIdentifier(conf.get(OUTPUT_COLUMN_FAMILY_PROP))); CqlConfigHelper.setOutputCql(job.getConfiguration(), cqlQuery); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), conf.get(CASSANDRA_HOST_PROP)); CqlConfigHelper.setUserNameAndPassword(job.getConfiguration(), conf.get(CASSANDRA_USER_PROP), conf.get(CASSANDRA_PASSWD_PROP)); // If batch size parameters are set, use them if ((conf.getInt(CASSANDRA_NODES, 0) > 0) && (conf.getInt(BATCH_SIZE_PROP, 0) > 0)) { conf.setInt(ColumnFamilyOutputFormat.BATCH_THRESHOLD, conf.getInt(BATCH_SIZE_PROP, 0)); conf.setInt(ColumnFamilyOutputFormat.QUEUE_SIZE, conf.getInt(BATCH_SIZE_PROP, 0) * conf.getInt(CASSANDRA_NODES, 0) + 1); } job.waitForCompletion(true); return 0; }