com.criteo.biggraphite.BgGenerateCassandraSSTables.java Source code

Java tutorial

Introduction

Here is the source code for com.criteo.biggraphite.BgGenerateCassandraSSTables.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Inspired from https://github.com/yukim/cassandra-bulkload-example/
 */
package com.criteo.biggraphite;

import org.apache.cassandra.config.Config;
import org.apache.cassandra.dht.Murmur3Partitioner;
import org.apache.cassandra.io.sstable.CQLSSTableWriter;
import org.supercsv.io.CsvListReader;
import org.supercsv.prefs.CsvPreference;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.UUID;

/**
 * Usage: java biggraphite.BgGenerateCassandraSSTables
 */
public class BgGenerateCassandraSSTables {
    /** Default output directory */
    private static final String DEFAULT_OUTPUT_DIR = "./data";

    /**
     * INSERT statement to bulk load.
     * It is like prepared statement. You fill in place holder for each data.
     */
    private static final String INSERT_STMT = "INSERT INTO %s.%s " + "(metric, time_start_ms, offset, value, count)"
            + " VALUES (?, ?, ?, ?, ?);";

    /**
     * Returns a new double initialized to the value represented by the specified String, as performed by the valueOf method of class Double.
     *
     * Compare to the builtin Double.parseDouble, it accepts "-inf" and "inf" as synonym for "-Infinity" and "Infinity"
     * ("-inf" and "inf" can be generated by biggraphite.cli.import_whisper).
     * @param s the string to be parsed.
     * @return the double value represented by the string argument.
     * @throws NullPointerException if the string is null
     * @throws NumberFormatException if the string does not contain a parsable double.
     */
    private static double parseDouble(String s) throws NumberFormatException {
        switch (s) {
        case "-inf":
            return Double.NEGATIVE_INFINITY;
        case "+inf":
        case "inf":
            return Double.POSITIVE_INFINITY;
        default:
            return Double.parseDouble(s);
        }
    }

    /**
     * Utility to write Cassandra SSTables.
     *
     * @param args <KEYSPACE> <TABLE> <CQL> <CSV>
     * @throws IOException if an I/O error occurs reading from the stream
     */
    public static void main(String[] args) throws IOException {

        if (args.length != 4) {
            System.out
                    .println("usage: java biggraphite.BgGenerateCassandraSSTables <KEYSPACE> <TABLE> <CQL> <CSV>");
            return;
        }
        final String keyspace = args[0];
        final String table = args[1];
        final String schema = new String(Files.readAllBytes(Paths.get(args[2])), StandardCharsets.UTF_8);
        final String data = args[3];
        final String insert_stmt = String.format(INSERT_STMT, keyspace, table);

        // magic!
        Config.setClientMode(true);

        // Create output directory that has keyspace and table name in the path
        final File outputDir = Paths.get(DEFAULT_OUTPUT_DIR, keyspace, table).toFile();
        if (!outputDir.exists() && !outputDir.mkdirs()) {
            throw new RuntimeException("Cannot create output directory: " + outputDir);
        }

        // Prepare SSTable writer
        final CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder().inDirectory(outputDir) // the directory where to write the sstables
                .forTable(schema) // the schema (CREATE TABLE statement) for the table for which sstable are to be created
                .using(insert_stmt) // the INSERT statement defining the order of the values to add for a given CQL row
                .withPartitioner(new Murmur3Partitioner());

        try (CQLSSTableWriter writer = builder.build();
                BufferedReader reader = new BufferedReader(new FileReader(data));
                CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) {
            // import_whisper don't generate any header, so we should NOT skip the first line
            //csvReader.getHeader(true);

            // Write to SSTable while reading data
            List<String> line;
            while ((line = csvReader.read()) != null) {
                // We use Java types here based on
                // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29
                writer.addRow(UUID.fromString(line.get(0)), // metric uuid
                        Long.parseLong(line.get(1)), // time_start_ms
                        Short.parseShort(line.get(2)), // offset
                        parseDouble(line.get(3)), // value
                        Integer.parseInt(line.get(4))); // count
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}