bulkload.DVDsLoad.java Source code

Java tutorial

Introduction

Here is the source code for bulkload.DVDsLoad.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package bulkload;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.text.SimpleDateFormat;
import java.util.List;

import org.supercsv.io.CsvListReader;
import org.supercsv.prefs.CsvPreference;

import org.apache.cassandra.config.Config;
import org.apache.cassandra.dht.Murmur3Partitioner;
import org.apache.cassandra.exceptions.InvalidRequestException;
import org.apache.cassandra.io.sstable.CQLSSTableWriter;

/**
 * Usage: java bulkload.DVDsLoad
 */
public class DVDsLoad {
    public static final String CSV_URL = "file:///Users/pkovacs/Downloads/dvd_csv/dvd_csv.txt";

    /** Default output directory */
    public static final String DEFAULT_OUTPUT_DIR = "./data";

    public static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");

    /** Keyspace name */
    public static final String KEYSPACE = "mykeyspace";
    /** Table name */
    public static final String TABLE = "dvds";

    /**
     * Schema for bulk loading table.
     * It is important not to forget adding keyspace name before table name,
     * otherwise CQLSSTableWriter throws exception.
     */
    public static final String SCHEMA = String.format("CREATE TABLE %s.%s (" + "title text, " + "studio text, "
            + "released text, " + "status text, " + "sound text, " + "versions text, " + "price text, "
            + "rating text, " + "year text, " + "genre text, " + "aspect text, " + "upc_id text, "
            + "release_date text, " + "id int, " + "timestamp text, " + "PRIMARY KEY (id)" + ");", KEYSPACE, TABLE);

    /**
     * INSERT statement to bulk load.
     * It is like prepared statement. You fill in place holder for each data.
     */
    public static final String INSERT_STMT = String.format(
            "INSERT INTO %s.%s (" + "title, studio, released, status, sound, versions, price, rating, year, genre, "
                    + "aspect, upc_id, release_date, id, timestamp" + ") VALUES ("
                    + "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?" + ")",
            KEYSPACE, TABLE);

    public static void main(String[] args) {
        if (args.length == 0) {
            System.out.println("usage: java bulkload.DVDsLoad <list of ticker symbols>");
            return;
        }

        // magic!
        Config.setClientMode(true);

        // Create output directory that has keyspace and table name in the path
        File outputDir = new File(DEFAULT_OUTPUT_DIR + File.separator + KEYSPACE + File.separator + TABLE);
        if (!outputDir.exists() && !outputDir.mkdirs()) {
            throw new RuntimeException("Cannot create output directory: " + outputDir);
        }

        // Prepare SSTable writer
        CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder();
        // set output directory
        builder.inDirectory(outputDir)
                // set target schema
                .forTable(SCHEMA)
                // set CQL statement to put data
                .using(INSERT_STMT)
                // set partitioner if needed
                // default is Murmur3Partitioner so set if you use different one.
                .withPartitioner(new Murmur3Partitioner());
        CQLSSTableWriter writer = builder.build();

        for (String ticker : args) {
            URLConnection conn;
            try {
                URL url = new URL(String.format(CSV_URL, ticker));
                conn = url.openConnection();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }

            try (BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
                    CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) {
                csvReader.getHeader(true);

                // Write to SSTable while reading data
                List<String> line;
                while ((line = csvReader.read()) != null) {
                    // We use Java types here based on
                    // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29
                    writer.addRow(line.get(0) == null ? null : new String(line.get(0)),
                            line.get(1) == null ? null : new String(line.get(1)),
                            line.get(2) == null ? null : new String(line.get(2)),
                            line.get(3) == null ? null : new String(line.get(3)),
                            line.get(4) == null ? null : new String(line.get(4)),
                            line.get(5) == null ? null : new String(line.get(5)),
                            line.get(6) == null ? null : new String(line.get(6)),
                            line.get(7) == null ? null : new String(line.get(7)),
                            line.get(8) == null ? null : new String(line.get(8)),
                            line.get(9) == null ? null : new String(line.get(9)),
                            line.get(10) == null ? null : new String(line.get(10)),
                            line.get(11) == null ? null : new String(line.get(11)),
                            line.get(12) == null ? "0001-01-01" : new String(line.get(12)),
                            line.get(13) == null ? null : new Integer(line.get(13)),
                            line.get(14) == null ? null : new String(line.get(14)));
                }
            } catch (InvalidRequestException | IOException e) {
                e.printStackTrace();
            }
        }

        try {
            writer.close();
        } catch (IOException ignore) {
        }
    }
}