Java tutorial
/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Inspired from https://github.com/yukim/cassandra-bulkload-example/ */ package com.criteo.biggraphite; import org.apache.cassandra.config.Config; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.io.sstable.CQLSSTableWriter; import org.supercsv.io.CsvListReader; import org.supercsv.prefs.CsvPreference; import java.io.*; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; import java.util.List; import java.util.UUID; /** * Usage: java biggraphite.BgGenerateCassandraSSTables */ public class BgGenerateCassandraSSTables { /** Default output directory */ private static final String DEFAULT_OUTPUT_DIR = "./data"; /** * INSERT statement to bulk load. * It is like prepared statement. You fill in place holder for each data. */ private static final String INSERT_STMT = "INSERT INTO %s.%s " + "(metric, time_start_ms, offset, value, count)" + " VALUES (?, ?, ?, ?, ?);"; /** * Returns a new double initialized to the value represented by the specified String, as performed by the valueOf method of class Double. * * Compare to the builtin Double.parseDouble, it accepts "-inf" and "inf" as synonym for "-Infinity" and "Infinity" * ("-inf" and "inf" can be generated by biggraphite.cli.import_whisper). * @param s the string to be parsed. * @return the double value represented by the string argument. * @throws NullPointerException if the string is null * @throws NumberFormatException if the string does not contain a parsable double. */ private static double parseDouble(String s) throws NumberFormatException { switch (s) { case "-inf": return Double.NEGATIVE_INFINITY; case "+inf": case "inf": return Double.POSITIVE_INFINITY; default: return Double.parseDouble(s); } } /** * Utility to write Cassandra SSTables. * * @param args <KEYSPACE> <TABLE> <CQL> <CSV> * @throws IOException if an I/O error occurs reading from the stream */ public static void main(String[] args) throws IOException { if (args.length != 4) { System.out .println("usage: java biggraphite.BgGenerateCassandraSSTables <KEYSPACE> <TABLE> <CQL> <CSV>"); return; } final String keyspace = args[0]; final String table = args[1]; final String schema = new String(Files.readAllBytes(Paths.get(args[2])), StandardCharsets.UTF_8); final String data = args[3]; final String insert_stmt = String.format(INSERT_STMT, keyspace, table); // magic! Config.setClientMode(true); // Create output directory that has keyspace and table name in the path final File outputDir = Paths.get(DEFAULT_OUTPUT_DIR, keyspace, table).toFile(); if (!outputDir.exists() && !outputDir.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir); } // Prepare SSTable writer final CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder().inDirectory(outputDir) // the directory where to write the sstables .forTable(schema) // the schema (CREATE TABLE statement) for the table for which sstable are to be created .using(insert_stmt) // the INSERT statement defining the order of the values to add for a given CQL row .withPartitioner(new Murmur3Partitioner()); try (CQLSSTableWriter writer = builder.build(); BufferedReader reader = new BufferedReader(new FileReader(data)); CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) { // import_whisper don't generate any header, so we should NOT skip the first line //csvReader.getHeader(true); // Write to SSTable while reading data List<String> line; while ((line = csvReader.read()) != null) { // We use Java types here based on // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29 writer.addRow(UUID.fromString(line.get(0)), // metric uuid Long.parseLong(line.get(1)), // time_start_ms Short.parseShort(line.get(2)), // offset parseDouble(line.get(3)), // value Integer.parseInt(line.get(4))); // count } } catch (IOException e) { e.printStackTrace(); } } }