List of usage examples for org.apache.cassandra.io.sstable CQLSSTableWriter builder
public static Builder builder()
From source file:biggraphite.BgGenerateCassandraSSTables.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 4) { System.out// w w w.j a v a2 s.c om .println("usage: java biggraphite.BgGenerateCassandraSSTables <KEYSPACE> <TABLE> <CQL> <CSV>"); return; } final String keyspace = args[0]; final String table = args[1]; final String schema = new String(Files.readAllBytes(Paths.get(args[2])), StandardCharsets.UTF_8); final String data = args[3]; final String insert_stmt = String.format(INSERT_STMT, keyspace, table); // magic! Config.setClientMode(true); // Create output directory that has keyspace and table name in the path File outputDir = new File(DEFAULT_OUTPUT_DIR + File.separator + keyspace + File.separator + table); if (!outputDir.exists() && !outputDir.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir); } // Prepare SSTable writer CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder(); // set output directory builder.inDirectory(outputDir).forTable(schema).using(insert_stmt) .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer = builder.build(); try (BufferedReader reader = new BufferedReader(new FileReader(data)); CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) { csvReader.getHeader(true); // Write to SSTable while reading data List<String> line; while ((line = csvReader.read()) != null) { // We use Java types here based on // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29 writer.addRow(UUID.fromString(line.get(0)), // metric uuid Long.parseLong(line.get(1)), // time_start_ms Short.parseShort(line.get(2)), // offset Double.parseDouble(line.get(3)), // value Integer.parseInt(line.get(4))); // count } } catch (IOException e) { e.printStackTrace(); } try { writer.close(); } catch (IOException ignore) { } }
From source file:bulkload.DVDsLoad.java
License:Apache License
public static void main(String[] args) { if (args.length == 0) { System.out.println("usage: java bulkload.DVDsLoad <list of ticker symbols>"); return;/*from ww w . j a v a2s. c o m*/ } // magic! Config.setClientMode(true); // Create output directory that has keyspace and table name in the path File outputDir = new File(DEFAULT_OUTPUT_DIR + File.separator + KEYSPACE + File.separator + TABLE); if (!outputDir.exists() && !outputDir.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir); } // Prepare SSTable writer CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder(); // set output directory builder.inDirectory(outputDir) // set target schema .forTable(SCHEMA) // set CQL statement to put data .using(INSERT_STMT) // set partitioner if needed // default is Murmur3Partitioner so set if you use different one. .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer = builder.build(); for (String ticker : args) { URLConnection conn; try { URL url = new URL(String.format(CSV_URL, ticker)); conn = url.openConnection(); } catch (IOException e) { throw new RuntimeException(e); } try (BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) { csvReader.getHeader(true); // Write to SSTable while reading data List<String> line; while ((line = csvReader.read()) != null) { // We use Java types here based on // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29 writer.addRow(line.get(0) == null ? null : new String(line.get(0)), line.get(1) == null ? null : new String(line.get(1)), line.get(2) == null ? null : new String(line.get(2)), line.get(3) == null ? null : new String(line.get(3)), line.get(4) == null ? null : new String(line.get(4)), line.get(5) == null ? null : new String(line.get(5)), line.get(6) == null ? null : new String(line.get(6)), line.get(7) == null ? null : new String(line.get(7)), line.get(8) == null ? null : new String(line.get(8)), line.get(9) == null ? null : new String(line.get(9)), line.get(10) == null ? null : new String(line.get(10)), line.get(11) == null ? null : new String(line.get(11)), line.get(12) == null ? "0001-01-01" : new String(line.get(12)), line.get(13) == null ? null : new Integer(line.get(13)), line.get(14) == null ? null : new String(line.get(14))); } } catch (InvalidRequestException | IOException e) { e.printStackTrace(); } } try { writer.close(); } catch (IOException ignore) { } }
From source file:bulkload.UploadIndexes.java
License:Apache License
public static void main(String[] args) { if (args.length != 1) { System.out.println(/*w w w.j av a2 s. com*/ "usage: java bulkload.UploadIndexes /path/to/inverted_indexes.txt (see bulkload.CreateInvertedIndices)"); return; } String inverted_indexes_file = args[0]; // magic! Config.setClientMode(true); // create inverted indexes { File outputDir2 = new File(Constants.KEYSPACE + File.separator + Constants.CF_INDEX); if (!outputDir2.exists() && !outputDir2.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir2); } // Prepare SSTable writer CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder(); // set output directory builder.inDirectory(outputDir2) // set target schema .forTable(SCHEMA_2) // set CQL statement to put data .using(INSERT_STMT_2) // set partitioner if needed // default is Murmur3Partitioner so set if you use different one. .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer = builder.build(); try (BufferedReader reader = new BufferedReader( new InputStreamReader(new FileInputStream(inverted_indexes_file)))) { int counter = 0; String line; while ((line = reader.readLine()) != null) { String[] parts = line.split(","); if (parts.length > 1) { long id = Long.parseLong(parts[0]); long l_word_id = id >> 32L; int word_id = (int) l_word_id; int predicate = (int) id; List<Integer> intList = new ArrayList<>(); for (int i = 1; i < parts.length; i++) { intList.add(Integer.parseInt(parts[i])); } Collections.sort(intList); if (intList.size() > 0) { try { writer.addRow(predicate, word_id, intList); } catch (InvalidRequestException | IOException e) { e.printStackTrace(); } } counter += 1; if ((counter % 1_000_000) == 0) { System.out.println(counter); } } } // while loop } catch (InvalidRequestException | IOException e) { e.printStackTrace(); } try { writer.close(); } catch (IOException ignore) { } } System.out.println("done"); String path = Constants.KEYSPACE + File.separator + Constants.CF_INDEX; System.out.println("you can upload these files to Cassandra: sstableloader -d host " + path); System.exit(0); }
From source file:bulkload.UploadTuples.java
License:Apache License
public static void main(String[] args) { if (args.length != 1) { System.out.println("usage: java bulkload.UploadTuples /path/to/free_base/data/"); return;//from w w w.jav a 2 s . c o m } String Freebase_base = args[0]; // magic! Config.setClientMode(true); // create main table { // Create output directory that has keyspace and table name in the path File outputDir1 = new File(Constants.KEYSPACE + File.separator + Constants.CF_TUPLE); if (!outputDir1.exists() && !outputDir1.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir1); } // Prepare SSTable writer CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder(); // set output directory builder.inDirectory(outputDir1) // set target schema .forTable(SCHEMA_1) // set CQL statement to put data .using(INSERT_STMT_1) // set partitioner if needed // default is Murmur3Partitioner so set if you use different one. .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer = builder.build(); try (BufferedReader reader = new BufferedReader(new InputStreamReader( new FileInputStream(Freebase_base + File.separator + "freebase_data.txt")))) { // Write to SSTable while reading data int id_counter = 1; int hm_counter = 1; String line; while ((line = reader.readLine()) != null) { String[] parts = line.split("\\|"); if (parts.length == 3) { List<Integer> lhs = toIntList(parts[0]); List<Integer> rhs = toIntList(parts[2]); int predicate = Integer.parseInt(parts[1]); writer.addRow(id_counter, lhs, predicate, rhs); id_counter += 1; if ((id_counter % 10_000_000) == 0) { System.out.println(id_counter); } } } // while loop } catch (InvalidRequestException | IOException e) { e.printStackTrace(); } try { writer.close(); } catch (IOException ignore) { } } System.out.println("done"); String path = Constants.KEYSPACE + File.separator + Constants.CF_TUPLE; System.out.println("you can upload these files to Cassandra: sstableloader -d host " + path); System.exit(0); }
From source file:bulkload.UploadVocab.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 1) { System.out.println("usage: java bulkload.UploadVocab /path/to/free_base/output/"); return;//from w w w .ja v a 2 s. c o m } String Freebase_base = args[0]; // magic! Config.setClientMode(true); // create main table { // Create output directory that has Constants.KEYSPACE and table name in the path File outputDir1 = new File(Constants.KEYSPACE + File.separator + Constants.CF_VOCAB); if (!outputDir1.exists() && !outputDir1.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir1); } File outputDir2 = new File(Constants.KEYSPACE + File.separator + Constants.CF_WORD); if (!outputDir2.exists() && !outputDir2.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir2); } // read the predicate vocab map Map<Integer, String> predicateMap = new HashMap<>(); try (BufferedReader reader = new BufferedReader(new InputStreamReader( new FileInputStream(Freebase_base + File.separator + "freebase_predicate_vocab.txt")))) { String line; // predicate_str|predicate_id while ((line = reader.readLine()) != null) { String[] parts = line.split("\\|"); if (parts.length == 2) { String word = parts[0]; int word_id = Integer.parseInt(parts[1]); predicateMap.put(word_id, word); } } // while loop } catch (InvalidRequestException | IOException e) { e.printStackTrace(); } // Prepare SSTable writer CQLSSTableWriter.Builder builder_1 = CQLSSTableWriter.builder(); // set output directory builder_1.inDirectory(outputDir1) // set target schema .forTable(SCHEMA_1) // set CQL statement to put data .using(INSERT_STMT_1) // set partitioner if needed // default is Murmur3Partitioner so set if you use different one. .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer_1 = builder_1.build(); // Prepare SSTable writer CQLSSTableWriter.Builder builder_2 = CQLSSTableWriter.builder(); // set output directory builder_2.inDirectory(outputDir2) // set target schema .forTable(SCHEMA_2) // set CQL statement to put data .using(INSERT_STMT_2) // set partitioner if needed // default is Murmur3Partitioner so set if you use different one. .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer_2 = builder_2.build(); Map<Integer, String> vocabMap = loadVocab(Freebase_base); for (int word_id : vocabMap.keySet()) { String word = vocabMap.get(word_id); writer_1.addRow(word_id, word, predicateMap.containsKey(word_id)); writer_2.addRow(word, word_id, predicateMap.containsKey(word_id)); } try { writer_1.close(); } catch (IOException ignore) { } try { writer_2.close(); } catch (IOException ignore) { } } System.out.println("done"); String path_1 = Constants.KEYSPACE + File.separator + Constants.CF_VOCAB; System.out.println("you can upload these files to Cassandra:\nsstableloader -d host " + path_1); String path_2 = Constants.KEYSPACE + File.separator + Constants.CF_WORD; System.out.println("sstableloader -d host " + path_2); System.exit(0); }
From source file:com.criteo.biggraphite.BgGenerateCassandraSSTables.java
License:Apache License
/** * Utility to write Cassandra SSTables.// ww w.j av a 2 s. c o m * * @param args <KEYSPACE> <TABLE> <CQL> <CSV> * @throws IOException if an I/O error occurs reading from the stream */ public static void main(String[] args) throws IOException { if (args.length != 4) { System.out .println("usage: java biggraphite.BgGenerateCassandraSSTables <KEYSPACE> <TABLE> <CQL> <CSV>"); return; } final String keyspace = args[0]; final String table = args[1]; final String schema = new String(Files.readAllBytes(Paths.get(args[2])), StandardCharsets.UTF_8); final String data = args[3]; final String insert_stmt = String.format(INSERT_STMT, keyspace, table); // magic! Config.setClientMode(true); // Create output directory that has keyspace and table name in the path final File outputDir = Paths.get(DEFAULT_OUTPUT_DIR, keyspace, table).toFile(); if (!outputDir.exists() && !outputDir.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir); } // Prepare SSTable writer final CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder().inDirectory(outputDir) // the directory where to write the sstables .forTable(schema) // the schema (CREATE TABLE statement) for the table for which sstable are to be created .using(insert_stmt) // the INSERT statement defining the order of the values to add for a given CQL row .withPartitioner(new Murmur3Partitioner()); try (CQLSSTableWriter writer = builder.build(); BufferedReader reader = new BufferedReader(new FileReader(data)); CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) { // import_whisper don't generate any header, so we should NOT skip the first line //csvReader.getHeader(true); // Write to SSTable while reading data List<String> line; while ((line = csvReader.read()) != null) { // We use Java types here based on // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29 writer.addRow(UUID.fromString(line.get(0)), // metric uuid Long.parseLong(line.get(1)), // time_start_ms Short.parseShort(line.get(2)), // offset parseDouble(line.get(3)), // value Integer.parseInt(line.get(4))); // count } } catch (IOException e) { e.printStackTrace(); } }
From source file:com.datastax.bulkloader.CQL3BulkLoadUsers.java
public CQL3BulkLoadUsers() throws IOException { logger.info("Using CQL3 Writer"); createDirectories(keyspace, tableName); this.writer = CQLSSTableWriter.builder().forTable(schema).using(INSERT_INTO_USER_TABLE) .inDirectory(getFilePath().getAbsolutePath()).build(); }
From source file:com.knewton.mapreduce.cassandra.WriteSampleSSTable.java
License:Apache License
/** * Writes a sample SSTable that can be used for running the example job {@link SSTableMRExample} * * @param args//from w w w . j av a 2 s. c om * Args to be parsed * @throws Exception */ public static void main(String[] args) throws Exception { buildParametersFromArgs(args); IPartitioner partitioner = StorageService.getPartitioner(); String schema = String.format( "CREATE TABLE %s.%s (studentid 'LongType', " + "eventid 'LongType'," + "data 'BytesType', " + "PRIMARY KEY (studentid, eventid))" + " WITH COMPACT STORAGE", KEYSPACE_NAME, COLUMN_FAMILY_NAME); String insertStatement = String.format("INSERT INTO %s.%s (studentid, eventid, data) " + "VALUES (?, ?, ?)", KEYSPACE_NAME, COLUMN_FAMILY_NAME); CQLSSTableWriter tableWriter = CQLSSTableWriter.builder().inDirectory(tableDirectory) .withPartitioner(partitioner).forTable(schema).using(insertStatement).build(); for (int i = 0; i < numberOfStudents; i++) { for (int j = 0; j < eventsPerStudent; j++) { StudentEvent studentEvent = RandomStudentEventGenerator.getRandomStudentEvent(); ByteBuffer columnValue = ByteBuffer .wrap(RandomStudentEventGenerator.serializeStudentEventData(studentEvent.getData())); tableWriter.addRow(RandomStudentEventGenerator.getRandomId(), studentEvent.getId(), columnValue); } } tableWriter.close(); }
From source file:com.spotify.hdfs2cass.cassandra.cql.CrunchCqlBulkRecordWriter.java
License:Apache License
private void prepareWriter() { try {/* ww w. j a v a 2s . c om*/ if (writer == null) { writer = CQLSSTableWriter.builder().forTable(schema).using(insertStatement) .withPartitioner(ConfigHelper.getOutputPartitioner(conf)).inDirectory(outputDir).sorted() .build(); } if (loader == null) { CrunchExternalClient externalClient = new CrunchExternalClient(conf); externalClient.addKnownCfs(keyspace, schema); this.loader = new SSTableLoader(outputDir, externalClient, new BulkRecordWriter.NullOutputHandler()); } } catch (Exception e) { throw new CrunchRuntimeException(e); } }
From source file:de.hpi.isg.mdms.hadoop.cassandra.CqlBulkRecordWriter.java
License:Apache License
private void prepareWriter() throws IOException { try {/*from w ww .j a va 2 s . c om*/ if (writer == null) { writer = CQLSSTableWriter.builder().forTable(schema).using(insertStatement) .withPartitioner(ConfigHelper.getOutputPartitioner(conf)).inDirectory(outputDir) .withBufferSizeInMB(Integer.parseInt(conf.get(BUFFER_SIZE_IN_MB, "64"))).build(); } if (loader == null) { ExternalClient externalClient = new ExternalClient(conf); this.loader = new SSTableLoader(outputDir, externalClient, new BulkRecordWriter.NullOutputHandler()) { @Override public void onSuccess(StreamState finalState) { if (deleteSrc) FileUtils.deleteRecursive(outputDir); } }; } } catch (Exception e) { throw new IOException(e); } }