Example usage for org.apache.cassandra.io.sstable CQLSSTableWriter builder

List of usage examples for org.apache.cassandra.io.sstable CQLSSTableWriter builder

Introduction

In this page you can find the example usage for org.apache.cassandra.io.sstable CQLSSTableWriter builder.

Prototype

public static Builder builder() 

Source Link

Document

Returns a new builder for a CQLSSTableWriter.

Usage

From source file:biggraphite.BgGenerateCassandraSSTables.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (args.length != 4) {
        System.out// w w w.j a v a2 s.c om
                .println("usage: java biggraphite.BgGenerateCassandraSSTables <KEYSPACE> <TABLE> <CQL> <CSV>");
        return;
    }
    final String keyspace = args[0];
    final String table = args[1];
    final String schema = new String(Files.readAllBytes(Paths.get(args[2])), StandardCharsets.UTF_8);
    final String data = args[3];
    final String insert_stmt = String.format(INSERT_STMT, keyspace, table);

    // magic!
    Config.setClientMode(true);

    // Create output directory that has keyspace and table name in the path
    File outputDir = new File(DEFAULT_OUTPUT_DIR + File.separator + keyspace + File.separator + table);
    if (!outputDir.exists() && !outputDir.mkdirs()) {
        throw new RuntimeException("Cannot create output directory: " + outputDir);
    }

    // Prepare SSTable writer
    CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder();
    // set output directory
    builder.inDirectory(outputDir).forTable(schema).using(insert_stmt)
            .withPartitioner(new Murmur3Partitioner());
    CQLSSTableWriter writer = builder.build();

    try (BufferedReader reader = new BufferedReader(new FileReader(data));
            CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) {
        csvReader.getHeader(true);

        // Write to SSTable while reading data
        List<String> line;
        while ((line = csvReader.read()) != null) {
            // We use Java types here based on
            // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29
            writer.addRow(UUID.fromString(line.get(0)), // metric uuid
                    Long.parseLong(line.get(1)), // time_start_ms
                    Short.parseShort(line.get(2)), // offset
                    Double.parseDouble(line.get(3)), // value
                    Integer.parseInt(line.get(4))); // count
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    try {
        writer.close();
    } catch (IOException ignore) {
    }
}

From source file:bulkload.DVDsLoad.java

License:Apache License

public static void main(String[] args) {
    if (args.length == 0) {
        System.out.println("usage: java bulkload.DVDsLoad <list of ticker symbols>");
        return;/*from ww  w  . j  a v  a2s. c  o m*/
    }

    // magic!
    Config.setClientMode(true);

    // Create output directory that has keyspace and table name in the path
    File outputDir = new File(DEFAULT_OUTPUT_DIR + File.separator + KEYSPACE + File.separator + TABLE);
    if (!outputDir.exists() && !outputDir.mkdirs()) {
        throw new RuntimeException("Cannot create output directory: " + outputDir);
    }

    // Prepare SSTable writer
    CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder();
    // set output directory
    builder.inDirectory(outputDir)
            // set target schema
            .forTable(SCHEMA)
            // set CQL statement to put data
            .using(INSERT_STMT)
            // set partitioner if needed
            // default is Murmur3Partitioner so set if you use different one.
            .withPartitioner(new Murmur3Partitioner());
    CQLSSTableWriter writer = builder.build();

    for (String ticker : args) {
        URLConnection conn;
        try {
            URL url = new URL(String.format(CSV_URL, ticker));
            conn = url.openConnection();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        try (BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
                CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) {
            csvReader.getHeader(true);

            // Write to SSTable while reading data
            List<String> line;
            while ((line = csvReader.read()) != null) {
                // We use Java types here based on
                // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29
                writer.addRow(line.get(0) == null ? null : new String(line.get(0)),
                        line.get(1) == null ? null : new String(line.get(1)),
                        line.get(2) == null ? null : new String(line.get(2)),
                        line.get(3) == null ? null : new String(line.get(3)),
                        line.get(4) == null ? null : new String(line.get(4)),
                        line.get(5) == null ? null : new String(line.get(5)),
                        line.get(6) == null ? null : new String(line.get(6)),
                        line.get(7) == null ? null : new String(line.get(7)),
                        line.get(8) == null ? null : new String(line.get(8)),
                        line.get(9) == null ? null : new String(line.get(9)),
                        line.get(10) == null ? null : new String(line.get(10)),
                        line.get(11) == null ? null : new String(line.get(11)),
                        line.get(12) == null ? "0001-01-01" : new String(line.get(12)),
                        line.get(13) == null ? null : new Integer(line.get(13)),
                        line.get(14) == null ? null : new String(line.get(14)));
            }
        } catch (InvalidRequestException | IOException e) {
            e.printStackTrace();
        }
    }

    try {
        writer.close();
    } catch (IOException ignore) {
    }
}

From source file:bulkload.UploadIndexes.java

License:Apache License

public static void main(String[] args) {
    if (args.length != 1) {
        System.out.println(/*w w  w.j av a2  s. com*/
                "usage: java bulkload.UploadIndexes /path/to/inverted_indexes.txt (see bulkload.CreateInvertedIndices)");
        return;
    }

    String inverted_indexes_file = args[0];

    // magic!
    Config.setClientMode(true);

    // create inverted indexes
    {
        File outputDir2 = new File(Constants.KEYSPACE + File.separator + Constants.CF_INDEX);
        if (!outputDir2.exists() && !outputDir2.mkdirs()) {
            throw new RuntimeException("Cannot create output directory: " + outputDir2);
        }

        // Prepare SSTable writer
        CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder();
        // set output directory
        builder.inDirectory(outputDir2)
                // set target schema
                .forTable(SCHEMA_2)
                // set CQL statement to put data
                .using(INSERT_STMT_2)
                // set partitioner if needed
                // default is Murmur3Partitioner so set if you use different one.
                .withPartitioner(new Murmur3Partitioner());

        CQLSSTableWriter writer = builder.build();

        try (BufferedReader reader = new BufferedReader(
                new InputStreamReader(new FileInputStream(inverted_indexes_file)))) {

            int counter = 0;
            String line;
            while ((line = reader.readLine()) != null) {
                String[] parts = line.split(",");

                if (parts.length > 1) {

                    long id = Long.parseLong(parts[0]);
                    long l_word_id = id >> 32L;
                    int word_id = (int) l_word_id;
                    int predicate = (int) id;

                    List<Integer> intList = new ArrayList<>();
                    for (int i = 1; i < parts.length; i++) {
                        intList.add(Integer.parseInt(parts[i]));
                    }

                    Collections.sort(intList);

                    if (intList.size() > 0) {
                        try {
                            writer.addRow(predicate, word_id, intList);
                        } catch (InvalidRequestException | IOException e) {
                            e.printStackTrace();
                        }
                    }
                    counter += 1;
                    if ((counter % 1_000_000) == 0) {
                        System.out.println(counter);
                    }

                }

            } // while loop

        } catch (InvalidRequestException | IOException e) {
            e.printStackTrace();
        }

        try {
            writer.close();
        } catch (IOException ignore) {
        }
    }

    System.out.println("done");
    String path = Constants.KEYSPACE + File.separator + Constants.CF_INDEX;
    System.out.println("you can upload these files to Cassandra: sstableloader -d host " + path);
    System.exit(0);

}

From source file:bulkload.UploadTuples.java

License:Apache License

public static void main(String[] args) {
    if (args.length != 1) {
        System.out.println("usage: java bulkload.UploadTuples /path/to/free_base/data/");
        return;//from   w  w  w.jav  a  2  s .  c o  m
    }

    String Freebase_base = args[0];

    // magic!
    Config.setClientMode(true);

    // create main table
    {
        // Create output directory that has keyspace and table name in the path
        File outputDir1 = new File(Constants.KEYSPACE + File.separator + Constants.CF_TUPLE);
        if (!outputDir1.exists() && !outputDir1.mkdirs()) {
            throw new RuntimeException("Cannot create output directory: " + outputDir1);
        }

        // Prepare SSTable writer
        CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder();
        // set output directory
        builder.inDirectory(outputDir1)
                // set target schema
                .forTable(SCHEMA_1)
                // set CQL statement to put data
                .using(INSERT_STMT_1)
                // set partitioner if needed
                // default is Murmur3Partitioner so set if you use different one.
                .withPartitioner(new Murmur3Partitioner());

        CQLSSTableWriter writer = builder.build();

        try (BufferedReader reader = new BufferedReader(new InputStreamReader(
                new FileInputStream(Freebase_base + File.separator + "freebase_data.txt")))) {

            // Write to SSTable while reading data
            int id_counter = 1;
            int hm_counter = 1;
            String line;
            while ((line = reader.readLine()) != null) {
                String[] parts = line.split("\\|");
                if (parts.length == 3) {
                    List<Integer> lhs = toIntList(parts[0]);
                    List<Integer> rhs = toIntList(parts[2]);
                    int predicate = Integer.parseInt(parts[1]);

                    writer.addRow(id_counter, lhs, predicate, rhs);

                    id_counter += 1;

                    if ((id_counter % 10_000_000) == 0) {
                        System.out.println(id_counter);
                    }

                }

            } // while loop

        } catch (InvalidRequestException | IOException e) {
            e.printStackTrace();
        }

        try {
            writer.close();
        } catch (IOException ignore) {
        }

    }

    System.out.println("done");
    String path = Constants.KEYSPACE + File.separator + Constants.CF_TUPLE;
    System.out.println("you can upload these files to Cassandra: sstableloader -d host " + path);
    System.exit(0);

}

From source file:bulkload.UploadVocab.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 1) {
        System.out.println("usage: java bulkload.UploadVocab /path/to/free_base/output/");
        return;//from w  w  w .ja v  a 2  s. c o  m
    }

    String Freebase_base = args[0];

    // magic!
    Config.setClientMode(true);

    // create main table
    {
        // Create output directory that has Constants.KEYSPACE and table name in the path
        File outputDir1 = new File(Constants.KEYSPACE + File.separator + Constants.CF_VOCAB);
        if (!outputDir1.exists() && !outputDir1.mkdirs()) {
            throw new RuntimeException("Cannot create output directory: " + outputDir1);
        }

        File outputDir2 = new File(Constants.KEYSPACE + File.separator + Constants.CF_WORD);
        if (!outputDir2.exists() && !outputDir2.mkdirs()) {
            throw new RuntimeException("Cannot create output directory: " + outputDir2);
        }

        // read the predicate vocab map
        Map<Integer, String> predicateMap = new HashMap<>();
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(
                new FileInputStream(Freebase_base + File.separator + "freebase_predicate_vocab.txt")))) {
            String line;
            // predicate_str|predicate_id
            while ((line = reader.readLine()) != null) {
                String[] parts = line.split("\\|");
                if (parts.length == 2) {
                    String word = parts[0];
                    int word_id = Integer.parseInt(parts[1]);
                    predicateMap.put(word_id, word);
                }
            } // while loop

        } catch (InvalidRequestException | IOException e) {
            e.printStackTrace();
        }

        // Prepare SSTable writer
        CQLSSTableWriter.Builder builder_1 = CQLSSTableWriter.builder();
        // set output directory
        builder_1.inDirectory(outputDir1)
                // set target schema
                .forTable(SCHEMA_1)
                // set CQL statement to put data
                .using(INSERT_STMT_1)
                // set partitioner if needed
                // default is Murmur3Partitioner so set if you use different one.
                .withPartitioner(new Murmur3Partitioner());

        CQLSSTableWriter writer_1 = builder_1.build();

        // Prepare SSTable writer
        CQLSSTableWriter.Builder builder_2 = CQLSSTableWriter.builder();
        // set output directory
        builder_2.inDirectory(outputDir2)
                // set target schema
                .forTable(SCHEMA_2)
                // set CQL statement to put data
                .using(INSERT_STMT_2)
                // set partitioner if needed
                // default is Murmur3Partitioner so set if you use different one.
                .withPartitioner(new Murmur3Partitioner());

        CQLSSTableWriter writer_2 = builder_2.build();

        Map<Integer, String> vocabMap = loadVocab(Freebase_base);
        for (int word_id : vocabMap.keySet()) {
            String word = vocabMap.get(word_id);
            writer_1.addRow(word_id, word, predicateMap.containsKey(word_id));
            writer_2.addRow(word, word_id, predicateMap.containsKey(word_id));
        }

        try {
            writer_1.close();
        } catch (IOException ignore) {
        }

        try {
            writer_2.close();
        } catch (IOException ignore) {
        }

    }

    System.out.println("done");
    String path_1 = Constants.KEYSPACE + File.separator + Constants.CF_VOCAB;
    System.out.println("you can upload these files to Cassandra:\nsstableloader -d host " + path_1);

    String path_2 = Constants.KEYSPACE + File.separator + Constants.CF_WORD;
    System.out.println("sstableloader -d host " + path_2);

    System.exit(0);

}

From source file:com.criteo.biggraphite.BgGenerateCassandraSSTables.java

License:Apache License

/**
 * Utility to write Cassandra SSTables.// ww  w.j av  a  2 s.  c  o m
 *
 * @param args <KEYSPACE> <TABLE> <CQL> <CSV>
 * @throws IOException if an I/O error occurs reading from the stream
 */
public static void main(String[] args) throws IOException {

    if (args.length != 4) {
        System.out
                .println("usage: java biggraphite.BgGenerateCassandraSSTables <KEYSPACE> <TABLE> <CQL> <CSV>");
        return;
    }
    final String keyspace = args[0];
    final String table = args[1];
    final String schema = new String(Files.readAllBytes(Paths.get(args[2])), StandardCharsets.UTF_8);
    final String data = args[3];
    final String insert_stmt = String.format(INSERT_STMT, keyspace, table);

    // magic!
    Config.setClientMode(true);

    // Create output directory that has keyspace and table name in the path
    final File outputDir = Paths.get(DEFAULT_OUTPUT_DIR, keyspace, table).toFile();
    if (!outputDir.exists() && !outputDir.mkdirs()) {
        throw new RuntimeException("Cannot create output directory: " + outputDir);
    }

    // Prepare SSTable writer
    final CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder().inDirectory(outputDir) // the directory where to write the sstables
            .forTable(schema) // the schema (CREATE TABLE statement) for the table for which sstable are to be created
            .using(insert_stmt) // the INSERT statement defining the order of the values to add for a given CQL row
            .withPartitioner(new Murmur3Partitioner());

    try (CQLSSTableWriter writer = builder.build();
            BufferedReader reader = new BufferedReader(new FileReader(data));
            CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) {
        // import_whisper don't generate any header, so we should NOT skip the first line
        //csvReader.getHeader(true);

        // Write to SSTable while reading data
        List<String> line;
        while ((line = csvReader.read()) != null) {
            // We use Java types here based on
            // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29
            writer.addRow(UUID.fromString(line.get(0)), // metric uuid
                    Long.parseLong(line.get(1)), // time_start_ms
                    Short.parseShort(line.get(2)), // offset
                    parseDouble(line.get(3)), // value
                    Integer.parseInt(line.get(4))); // count
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.datastax.bulkloader.CQL3BulkLoadUsers.java

public CQL3BulkLoadUsers() throws IOException {

    logger.info("Using CQL3 Writer");

    createDirectories(keyspace, tableName);

    this.writer = CQLSSTableWriter.builder().forTable(schema).using(INSERT_INTO_USER_TABLE)
            .inDirectory(getFilePath().getAbsolutePath()).build();
}

From source file:com.knewton.mapreduce.cassandra.WriteSampleSSTable.java

License:Apache License

/**
 * Writes a sample SSTable that can be used for running the example job {@link SSTableMRExample}
 *
 * @param args//from   w w  w  . j  av a  2  s.  c  om
 *            Args to be parsed
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    buildParametersFromArgs(args);

    IPartitioner partitioner = StorageService.getPartitioner();
    String schema = String.format(
            "CREATE TABLE %s.%s (studentid 'LongType', " + "eventid 'LongType'," + "data 'BytesType', "
                    + "PRIMARY KEY (studentid, eventid))" + " WITH COMPACT STORAGE",
            KEYSPACE_NAME, COLUMN_FAMILY_NAME);

    String insertStatement = String.format("INSERT INTO %s.%s (studentid, eventid, data) " + "VALUES (?, ?, ?)",
            KEYSPACE_NAME, COLUMN_FAMILY_NAME);

    CQLSSTableWriter tableWriter = CQLSSTableWriter.builder().inDirectory(tableDirectory)
            .withPartitioner(partitioner).forTable(schema).using(insertStatement).build();

    for (int i = 0; i < numberOfStudents; i++) {
        for (int j = 0; j < eventsPerStudent; j++) {
            StudentEvent studentEvent = RandomStudentEventGenerator.getRandomStudentEvent();

            ByteBuffer columnValue = ByteBuffer
                    .wrap(RandomStudentEventGenerator.serializeStudentEventData(studentEvent.getData()));

            tableWriter.addRow(RandomStudentEventGenerator.getRandomId(), studentEvent.getId(), columnValue);
        }
    }

    tableWriter.close();
}

From source file:com.spotify.hdfs2cass.cassandra.cql.CrunchCqlBulkRecordWriter.java

License:Apache License

private void prepareWriter() {
    try {/*  ww  w. j  a  v  a  2s  . c om*/
        if (writer == null) {
            writer = CQLSSTableWriter.builder().forTable(schema).using(insertStatement)
                    .withPartitioner(ConfigHelper.getOutputPartitioner(conf)).inDirectory(outputDir).sorted()
                    .build();
        }
        if (loader == null) {
            CrunchExternalClient externalClient = new CrunchExternalClient(conf);
            externalClient.addKnownCfs(keyspace, schema);
            this.loader = new SSTableLoader(outputDir, externalClient,
                    new BulkRecordWriter.NullOutputHandler());
        }
    } catch (Exception e) {
        throw new CrunchRuntimeException(e);
    }
}

From source file:de.hpi.isg.mdms.hadoop.cassandra.CqlBulkRecordWriter.java

License:Apache License

private void prepareWriter() throws IOException {
    try {/*from   w  ww .j a  va 2  s  .  c  om*/
        if (writer == null) {
            writer = CQLSSTableWriter.builder().forTable(schema).using(insertStatement)
                    .withPartitioner(ConfigHelper.getOutputPartitioner(conf)).inDirectory(outputDir)
                    .withBufferSizeInMB(Integer.parseInt(conf.get(BUFFER_SIZE_IN_MB, "64"))).build();
        }
        if (loader == null) {
            ExternalClient externalClient = new ExternalClient(conf);

            this.loader = new SSTableLoader(outputDir, externalClient,
                    new BulkRecordWriter.NullOutputHandler()) {
                @Override
                public void onSuccess(StreamState finalState) {
                    if (deleteSrc)
                        FileUtils.deleteRecursive(outputDir);
                }
            };
        }
    } catch (Exception e) {
        throw new IOException(e);
    }
}