Example usage for org.apache.cassandra.io.sstable CQLSSTableWriter builder

Introduction

In this page you can find the example usage for org.apache.cassandra.io.sstable CQLSSTableWriter builder.

Prototype

public static Builder builder()

Source Link

Document

Returns a new builder for a CQLSSTableWriter.

Usage

From source file:biggraphite.BgGenerateCassandraSSTables.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (args.length != 4) {
        System.out// w w w.j a v a2 s.c om
                .println("usage: java biggraphite.BgGenerateCassandraSSTables <KEYSPACE> <TABLE> <CQL> <CSV>");
        return;
    }
    final String keyspace = args[0];
    final String table = args[1];
    final String schema = new String(Files.readAllBytes(Paths.get(args[2])), StandardCharsets.UTF_8);
    final String data = args[3];
    final String insert_stmt = String.format(INSERT_STMT, keyspace, table);

    // magic!
    Config.setClientMode(true);

    // Create output directory that has keyspace and table name in the path
    File outputDir = new File(DEFAULT_OUTPUT_DIR + File.separator + keyspace + File.separator + table);
    if (!outputDir.exists() && !outputDir.mkdirs()) {
        throw new RuntimeException("Cannot create output directory: " + outputDir);
    }

    // Prepare SSTable writer
    CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder();
    // set output directory
    builder.inDirectory(outputDir).forTable(schema).using(insert_stmt)
            .withPartitioner(new Murmur3Partitioner());
    CQLSSTableWriter writer = builder.build();

    try (BufferedReader reader = new BufferedReader(new FileReader(data));
            CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) {
        csvReader.getHeader(true);

        // Write to SSTable while reading data
        List<String> line;
        while ((line = csvReader.read()) != null) {
            // We use Java types here based on
            // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29
            writer.addRow(UUID.fromString(line.get(0)), // metric uuid
                    Long.parseLong(line.get(1)), // time_start_ms
                    Short.parseShort(line.get(2)), // offset
                    Double.parseDouble(line.get(3)), // value
                    Integer.parseInt(line.get(4))); // count
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    try {
        writer.close();
    } catch (IOException ignore) {
    }
}

From source file:bulkload.DVDsLoad.java

License:Apache License

public static void main(String[] args) {
    if (args.length == 0) {
        System.out.println("usage: java bulkload.DVDsLoad <list of ticker symbols>");
        return;/*from ww  w  . j  a v  a2s. c  o m*/
    }

    // magic!
    Config.setClientMode(true);

    // Create output directory that has keyspace and table name in the path
    File outputDir = new File(DEFAULT_OUTPUT_DIR + File.separator + KEYSPACE + File.separator + TABLE);
    if (!outputDir.exists() && !outputDir.mkdirs()) {
        throw new RuntimeException("Cannot create output directory: " + outputDir);
    }

    // Prepare SSTable writer
    CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder();
    // set output directory
    builder.inDirectory(outputDir)
            // set target schema
            .forTable(SCHEMA)
            // set CQL statement to put data
            .using(INSERT_STMT)
            // set partitioner if needed
            // default is Murmur3Partitioner so set if you use different one.
            .withPartitioner(new Murmur3Partitioner());
    CQLSSTableWriter writer = builder.build();

    for (String ticker : args) {
        URLConnection conn;
        try {
            URL url = new URL(String.format(CSV_URL, ticker));
            conn = url.openConnection();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        try (BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
                CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) {
            csvReader.getHeader(true);

            // Write to SSTable while reading data
            List<String> line;
            while ((line = csvReader.read()) != null) {
                // We use Java types here based on
                // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29
                writer.addRow(line.get(0) == null ? null : new String(line.get(0)),
                        line.get(1) == null ? null : new String(line.get(1)),
                        line.get(2) == null ? null : new String(line.get(2)),
                        line.get(3) == null ? null : new String(line.get(3)),
                        line.get(4) == null ? null : new String(line.get(4)),
                        line.get(5) == null ? null : new String(line.get(5)),
                        line.get(6) == null ? null : new String(line.get(6)),
                        line.get(7) == null ? null : new String(line.get(7)),
                        line.get(8) == null ? null : new String(line.get(8)),
                        line.get(9) == null ? null : new String(line.get(9)),
                        line.get(10) == null ? null : new String(line.get(10)),
                        line.get(11) == null ? null : new String(line.get(11)),
                        line.get(12) == null ? "0001-01-01" : new String(line.get(12)),
                        line.get(13) == null ? null : new Integer(line.get(13)),
                        line.get(14) == null ? null : new String(line.get(14)));
            }
        } catch (InvalidRequestException | IOException e) {
            e.printStackTrace();
        }
    }

    try {
        writer.close();
    } catch (IOException ignore) {
    }
}

From source file:bulkload.UploadIndexes.java

License:Apache License

public static void main(String[] args) {
    if (args.length != 1) {
        System.out.println(/*w w  w.j av a2  s. com*/
                "usage: java bulkload.UploadIndexes /path/to/inverted_indexes.txt (see bulkload.CreateInvertedIndices)");
        return;
    }

    String inverted_indexes_file = args[0];

    // magic!
    Config.setClientMode(true);

    // create inverted indexes
    {
        File outputDir2 = new File(Constants.KEYSPACE + File.separator + Constants.CF_INDEX);
        if (!outputDir2.exists() && !outputDir2.mkdirs()) {
            throw new RuntimeException("Cannot create output directory: " + outputDir2);
        }

        // Prepare SSTable writer
        CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder();
        // set output directory
        builder.inDirectory(outputDir2)
                // set target schema
                .forTable(SCHEMA_2)
                // set CQL statement to put data
                .using(INSERT_STMT_2)
                // set partitioner if needed
                // default is Murmur3Partitioner so set if you use different one.
                .withPartitioner(new Murmur3Partitioner());

        CQLSSTableWriter writer = builder.build();

        try (BufferedReader reader = new BufferedReader(
                new InputStreamReader(new FileInputStream(inverted_indexes_file)))) {

            int counter = 0;
            String line;
            while ((line = reader.readLine()) != null) {
                String[] parts = line.split(",");

                if (parts.length > 1) {

                    long id = Long.parseLong(parts[0]);
                    long l_word_id = id >> 32L;
                    int word_id = (int) l_word_id;
                    int predicate = (int) id;

                    List<Integer> intList = new ArrayList<>();
                    for (int i = 1; i < parts.length; i++) {
                        intList.add(Integer.parseInt(parts[i]));
                    }

                    Collections.sort(intList);

                    if (intList.size() > 0) {
                        try {
                            writer.addRow(predicate, word_id, intList);
                        } catch (InvalidRequestException | IOException e) {
                            e.printStackTrace();
                        }
                    }
                    counter += 1;
                    if ((counter % 1_000_000) == 0) {
                        System.out.println(counter);
                    }

                }

            } // while loop

        } catch (InvalidRequestException | IOException e) {
            e.printStackTrace();
        }

        try {
            writer.close();
        } catch (IOException ignore) {
        }
    }

    System.out.println("done");
    String path = Constants.KEYSPACE + File.separator + Constants.CF_INDEX;
    System.out.println("you can upload these files to Cassandra: sstableloader -d host " + path);
    System.exit(0);

}

From source file:bulkload.UploadTuples.java

License:Apache License

public static void main(String[] args) {
    if (args.length != 1) {
        System.out.println("usage: java bulkload.UploadTuples /path/to/free_base/data/");
        return;//from   w  w  w.jav  a  2  s .  c o  m
    }

    String Freebase_base = args[0];

    // magic!
    Config.setClientMode(true);

    // create main table
    {
        // Create output directory that has keyspace and table name in the path
        File outputDir1 = new File(Constants.KEYSPACE + File.separator + Constants.CF_TUPLE);
        if (!outputDir1.exists() && !outputDir1.mkdirs()) {
            throw new RuntimeException("Cannot create output directory: " + outputDir1);
        }

        // Prepare SSTable writer
        CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder();
        // set output directory
        builder.inDirectory(outputDir1)
                // set target schema
                .forTable(SCHEMA_1)
                // set CQL statement to put data
                .using(INSERT_STMT_1)
                // set partitioner if needed
                // default is Murmur3Partitioner so set if you use different one.
                .withPartitioner(new Murmur3Partitioner());

        CQLSSTableWriter writer = builder.build();

        try (BufferedReader reader = new BufferedReader(new InputStreamReader(
                new FileInputStream(Freebase_base + File.separator + "freebase_data.txt")))) {

            // Write to SSTable while reading data
            int id_counter = 1;
            int hm_counter = 1;
            String line;
            while ((line = reader.readLine()) != null) {
                String[] parts = line.split("\\|");
                if (parts.length == 3) {
                    List<Integer> lhs = toIntList(parts[0]);
                    List<Integer> rhs = toIntList(parts[2]);
                    int predicate = Integer.parseInt(parts[1]);

                    writer.addRow(id_counter, lhs, predicate, rhs);

                    id_counter += 1;

                    if ((id_counter % 10_000_000) == 0) {
                        System.out.println(id_counter);
                    }

                }

            } // while loop

        } catch (InvalidRequestException | IOException e) {
            e.printStackTrace();
        }

        try {
            writer.close();
        } catch (IOException ignore) {
        }

    }

    System.out.println("done");
    String path = Constants.KEYSPACE + File.separator + Constants.CF_TUPLE;
    System.out.println("you can upload these files to Cassandra: sstableloader -d host " + path);
    System.exit(0);

}

From source file:bulkload.UploadVocab.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 1) {
        System.out.println("usage: java bulkload.UploadVocab /path/to/free_base/output/");
        return;//from w  w  w .ja v  a 2  s. c o  m
    }

    String Freebase_base = args[0];

    // magic!
    Config.setClientMode(true);

    // create main table
    {
        // Create output directory that has Constants.KEYSPACE and table name in the path
        File outputDir1 = new File(Constants.KEYSPACE + File.separator + Constants.CF_VOCAB);
        if (!outputDir1.exists() && !outputDir1.mkdirs()) {
            throw new RuntimeException("Cannot create output directory: " + outputDir1);
        }

        File outputDir2 = new File(Constants.KEYSPACE + File.separator + Constants.CF_WORD);
        if (!outputDir2.exists() && !outputDir2.mkdirs()) {
            throw new RuntimeException("Cannot create output directory: " + outputDir2);
        }

        // read the predicate vocab map
        Map<Integer, String> predicateMap = new HashMap<>();
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(
                new FileInputStream(Freebase_base + File.separator + "freebase_predicate_vocab.txt")))) {
            String line;
            // predicate_str|predicate_id
            while ((line = reader.readLine()) != null) {
                String[] parts = line.split("\\|");
                if (parts.length == 2) {
                    String word = parts[0];
                    int word_id = Integer.parseInt(parts[1]);
                    predicateMap.put(word_id, word);
                }
            } // while loop

        } catch (InvalidRequestException | IOException e) {
            e.printStackTrace();
        }

        // Prepare SSTable writer
        CQLSSTableWriter.Builder builder_1 = CQLSSTableWriter.builder();
        // set output directory
        builder_1.inDirectory(outputDir1)
                // set target schema
                .forTable(SCHEMA_1)
                // set CQL statement to put data
                .using(INSERT_STMT_1)
                // set partitioner if needed
                // default is Murmur3Partitioner so set if you use different one.
                .withPartitioner(new Murmur3Partitioner());

        CQLSSTableWriter writer_1 = builder_1.build();

        // Prepare SSTable writer
        CQLSSTableWriter.Builder builder_2 = CQLSSTableWriter.builder();
        // set output directory
        builder_2.inDirectory(outputDir2)
                // set target schema
                .forTable(SCHEMA_2)
                // set CQL statement to put data
                .using(INSERT_STMT_2)
                // set partitioner if needed
                // default is Murmur3Partitioner so set if you use different one.
                .withPartitioner(new Murmur3Partitioner());

        CQLSSTableWriter writer_2 = builder_2.build();

        Map<Integer, String> vocabMap = loadVocab(Freebase_base);
        for (int word_id : vocabMap.keySet()) {
            String word = vocabMap.get(word_id);
            writer_1.addRow(word_id, word, predicateMap.containsKey(word_id));
            writer_2.addRow(word, word_id, predicateMap.containsKey(word_id));
        }

        try {
            writer_1.close();
        } catch (IOException ignore) {
        }

        try {
            writer_2.close();
        } catch (IOException ignore) {
        }

    }

    System.out.println("done");
    String path_1 = Constants.KEYSPACE + File.separator + Constants.CF_VOCAB;
    System.out.println("you can upload these files to Cassandra:\nsstableloader -d host " + path_1);

    String path_2 = Constants.KEYSPACE + File.separator + Constants.CF_WORD;
    System.out.println("sstableloader -d host " + path_2);

    System.exit(0);

}

From source file:com.criteo.biggraphite.BgGenerateCassandraSSTables.java

License:Apache License

/**
 * Utility to write Cassandra SSTables.// ww  w.j av  a  2 s.  c  o m
 *
 * @param args <KEYSPACE> <TABLE> <CQL> <CSV>
 * @throws IOException if an I/O error occurs reading from the stream
 */
public static void main(String[] args) throws IOException {

    if (args.length != 4) {
        System.out
                .println("usage: java biggraphite.BgGenerateCassandraSSTables <KEYSPACE> <TABLE> <CQL> <CSV>");
        return;
    }
    final String keyspace = args[0];
    final String table = args[1];
    final String schema = new String(Files.readAllBytes(Paths.get(args[2])), StandardCharsets.UTF_8);
    final String data = args[3];
    final String insert_stmt = String.format(INSERT_STMT, keyspace, table);

    // magic!
    Config.setClientMode(true);

    // Create output directory that has keyspace and table name in the path
    final File outputDir = Paths.get(DEFAULT_OUTPUT_DIR, keyspace, table).toFile();
    if (!outputDir.exists() && !outputDir.mkdirs()) {
        throw new RuntimeException("Cannot create output directory: " + outputDir);
    }

    // Prepare SSTable writer
    final CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder().inDirectory(outputDir) // the directory where to write the sstables
            .forTable(schema) // the schema (CREATE TABLE statement) for the table for which sstable are to be created
            .using(insert_stmt) // the INSERT statement defining the order of the values to add for a given CQL row
            .withPartitioner(new Murmur3Partitioner());

    try (CQLSSTableWriter writer = builder.build();
            BufferedReader reader = new BufferedReader(new FileReader(data));
            CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) {
        // import_whisper don't generate any header, so we should NOT skip the first line
        //csvReader.getHeader(true);

        // Write to SSTable while reading data
        List<String> line;
        while ((line = csvReader.read()) != null) {
            // We use Java types here based on
            // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29
            writer.addRow(UUID.fromString(line.get(0)), // metric uuid
                    Long.parseLong(line.get(1)), // time_start_ms
                    Short.parseShort(line.get(2)), // offset
                    parseDouble(line.get(3)), // value
                    Integer.parseInt(line.get(4))); // count
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.datastax.bulkloader.CQL3BulkLoadUsers.java

public CQL3BulkLoadUsers() throws IOException {

    logger.info("Using CQL3 Writer");

    createDirectories(keyspace, tableName);

    this.writer = CQLSSTableWriter.builder().forTable(schema).using(INSERT_INTO_USER_TABLE)
            .inDirectory(getFilePath().getAbsolutePath()).build();
}

From source file:com.knewton.mapreduce.cassandra.WriteSampleSSTable.java

License:Apache License

/**
 * Writes a sample SSTable that can be used for running the example job {@link SSTableMRExample}
 *
 * @param args//from   w w  w  . j  av a  2  s.  c  om
 *            Args to be parsed
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    buildParametersFromArgs(args);

    IPartitioner partitioner = StorageService.getPartitioner();
    String schema = String.format(
            "CREATE TABLE %s.%s (studentid 'LongType', " + "eventid 'LongType'," + "data 'BytesType', "
                    + "PRIMARY KEY (studentid, eventid))" + " WITH COMPACT STORAGE",
            KEYSPACE_NAME, COLUMN_FAMILY_NAME);

    String insertStatement = String.format("INSERT INTO %s.%s (studentid, eventid, data) " + "VALUES (?, ?, ?)",
            KEYSPACE_NAME, COLUMN_FAMILY_NAME);

    CQLSSTableWriter tableWriter = CQLSSTableWriter.builder().inDirectory(tableDirectory)
            .withPartitioner(partitioner).forTable(schema).using(insertStatement).build();

    for (int i = 0; i < numberOfStudents; i++) {
        for (int j = 0; j < eventsPerStudent; j++) {
            StudentEvent studentEvent = RandomStudentEventGenerator.getRandomStudentEvent();

            ByteBuffer columnValue = ByteBuffer
                    .wrap(RandomStudentEventGenerator.serializeStudentEventData(studentEvent.getData()));

            tableWriter.addRow(RandomStudentEventGenerator.getRandomId(), studentEvent.getId(), columnValue);
        }
    }

    tableWriter.close();
}

From source file:com.spotify.hdfs2cass.cassandra.cql.CrunchCqlBulkRecordWriter.java

License:Apache License

private void prepareWriter() {
    try {/*  ww  w. j  a  v  a  2s  . c om*/
        if (writer == null) {
            writer = CQLSSTableWriter.builder().forTable(schema).using(insertStatement)
                    .withPartitioner(ConfigHelper.getOutputPartitioner(conf)).inDirectory(outputDir).sorted()
                    .build();
        }
        if (loader == null) {
            CrunchExternalClient externalClient = new CrunchExternalClient(conf);
            externalClient.addKnownCfs(keyspace, schema);
            this.loader = new SSTableLoader(outputDir, externalClient,
                    new BulkRecordWriter.NullOutputHandler());
        }
    } catch (Exception e) {
        throw new CrunchRuntimeException(e);
    }
}

From source file:de.hpi.isg.mdms.hadoop.cassandra.CqlBulkRecordWriter.java

License:Apache License

private void prepareWriter() throws IOException {
    try {/*from   w  ww .j a  va 2  s  .  c  om*/
        if (writer == null) {
            writer = CQLSSTableWriter.builder().forTable(schema).using(insertStatement)
                    .withPartitioner(ConfigHelper.getOutputPartitioner(conf)).inDirectory(outputDir)
                    .withBufferSizeInMB(Integer.parseInt(conf.get(BUFFER_SIZE_IN_MB, "64"))).build();
        }
        if (loader == null) {
            ExternalClient externalClient = new ExternalClient(conf);

            this.loader = new SSTableLoader(outputDir, externalClient,
                    new BulkRecordWriter.NullOutputHandler()) {
                @Override
                public void onSuccess(StreamState finalState) {
                    if (deleteSrc)
                        FileUtils.deleteRecursive(outputDir);
                }
            };
        }
    } catch (Exception e) {
        throw new IOException(e);
    }
}