List of usage examples for org.apache.cassandra.dht Murmur3Partitioner Murmur3Partitioner
Murmur3Partitioner
From source file:biggraphite.BgGenerateCassandraSSTables.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 4) { System.out//from w w w . j av a2s . c om .println("usage: java biggraphite.BgGenerateCassandraSSTables <KEYSPACE> <TABLE> <CQL> <CSV>"); return; } final String keyspace = args[0]; final String table = args[1]; final String schema = new String(Files.readAllBytes(Paths.get(args[2])), StandardCharsets.UTF_8); final String data = args[3]; final String insert_stmt = String.format(INSERT_STMT, keyspace, table); // magic! Config.setClientMode(true); // Create output directory that has keyspace and table name in the path File outputDir = new File(DEFAULT_OUTPUT_DIR + File.separator + keyspace + File.separator + table); if (!outputDir.exists() && !outputDir.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir); } // Prepare SSTable writer CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder(); // set output directory builder.inDirectory(outputDir).forTable(schema).using(insert_stmt) .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer = builder.build(); try (BufferedReader reader = new BufferedReader(new FileReader(data)); CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) { csvReader.getHeader(true); // Write to SSTable while reading data List<String> line; while ((line = csvReader.read()) != null) { // We use Java types here based on // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29 writer.addRow(UUID.fromString(line.get(0)), // metric uuid Long.parseLong(line.get(1)), // time_start_ms Short.parseShort(line.get(2)), // offset Double.parseDouble(line.get(3)), // value Integer.parseInt(line.get(4))); // count } } catch (IOException e) { e.printStackTrace(); } try { writer.close(); } catch (IOException ignore) { } }
From source file:bulkload.DVDsLoad.java
License:Apache License
public static void main(String[] args) { if (args.length == 0) { System.out.println("usage: java bulkload.DVDsLoad <list of ticker symbols>"); return;/*from w w w.j a v a 2 s .c o m*/ } // magic! Config.setClientMode(true); // Create output directory that has keyspace and table name in the path File outputDir = new File(DEFAULT_OUTPUT_DIR + File.separator + KEYSPACE + File.separator + TABLE); if (!outputDir.exists() && !outputDir.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir); } // Prepare SSTable writer CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder(); // set output directory builder.inDirectory(outputDir) // set target schema .forTable(SCHEMA) // set CQL statement to put data .using(INSERT_STMT) // set partitioner if needed // default is Murmur3Partitioner so set if you use different one. .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer = builder.build(); for (String ticker : args) { URLConnection conn; try { URL url = new URL(String.format(CSV_URL, ticker)); conn = url.openConnection(); } catch (IOException e) { throw new RuntimeException(e); } try (BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) { csvReader.getHeader(true); // Write to SSTable while reading data List<String> line; while ((line = csvReader.read()) != null) { // We use Java types here based on // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29 writer.addRow(line.get(0) == null ? null : new String(line.get(0)), line.get(1) == null ? null : new String(line.get(1)), line.get(2) == null ? null : new String(line.get(2)), line.get(3) == null ? null : new String(line.get(3)), line.get(4) == null ? null : new String(line.get(4)), line.get(5) == null ? null : new String(line.get(5)), line.get(6) == null ? null : new String(line.get(6)), line.get(7) == null ? null : new String(line.get(7)), line.get(8) == null ? null : new String(line.get(8)), line.get(9) == null ? null : new String(line.get(9)), line.get(10) == null ? null : new String(line.get(10)), line.get(11) == null ? null : new String(line.get(11)), line.get(12) == null ? "0001-01-01" : new String(line.get(12)), line.get(13) == null ? null : new Integer(line.get(13)), line.get(14) == null ? null : new String(line.get(14))); } } catch (InvalidRequestException | IOException e) { e.printStackTrace(); } } try { writer.close(); } catch (IOException ignore) { } }
From source file:bulkload.UploadIndexes.java
License:Apache License
public static void main(String[] args) { if (args.length != 1) { System.out.println(/*from w w w . ja va 2 s . c om*/ "usage: java bulkload.UploadIndexes /path/to/inverted_indexes.txt (see bulkload.CreateInvertedIndices)"); return; } String inverted_indexes_file = args[0]; // magic! Config.setClientMode(true); // create inverted indexes { File outputDir2 = new File(Constants.KEYSPACE + File.separator + Constants.CF_INDEX); if (!outputDir2.exists() && !outputDir2.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir2); } // Prepare SSTable writer CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder(); // set output directory builder.inDirectory(outputDir2) // set target schema .forTable(SCHEMA_2) // set CQL statement to put data .using(INSERT_STMT_2) // set partitioner if needed // default is Murmur3Partitioner so set if you use different one. .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer = builder.build(); try (BufferedReader reader = new BufferedReader( new InputStreamReader(new FileInputStream(inverted_indexes_file)))) { int counter = 0; String line; while ((line = reader.readLine()) != null) { String[] parts = line.split(","); if (parts.length > 1) { long id = Long.parseLong(parts[0]); long l_word_id = id >> 32L; int word_id = (int) l_word_id; int predicate = (int) id; List<Integer> intList = new ArrayList<>(); for (int i = 1; i < parts.length; i++) { intList.add(Integer.parseInt(parts[i])); } Collections.sort(intList); if (intList.size() > 0) { try { writer.addRow(predicate, word_id, intList); } catch (InvalidRequestException | IOException e) { e.printStackTrace(); } } counter += 1; if ((counter % 1_000_000) == 0) { System.out.println(counter); } } } // while loop } catch (InvalidRequestException | IOException e) { e.printStackTrace(); } try { writer.close(); } catch (IOException ignore) { } } System.out.println("done"); String path = Constants.KEYSPACE + File.separator + Constants.CF_INDEX; System.out.println("you can upload these files to Cassandra: sstableloader -d host " + path); System.exit(0); }
From source file:bulkload.UploadTuples.java
License:Apache License
public static void main(String[] args) { if (args.length != 1) { System.out.println("usage: java bulkload.UploadTuples /path/to/free_base/data/"); return;//from w w w .j ava2s . c o m } String Freebase_base = args[0]; // magic! Config.setClientMode(true); // create main table { // Create output directory that has keyspace and table name in the path File outputDir1 = new File(Constants.KEYSPACE + File.separator + Constants.CF_TUPLE); if (!outputDir1.exists() && !outputDir1.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir1); } // Prepare SSTable writer CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder(); // set output directory builder.inDirectory(outputDir1) // set target schema .forTable(SCHEMA_1) // set CQL statement to put data .using(INSERT_STMT_1) // set partitioner if needed // default is Murmur3Partitioner so set if you use different one. .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer = builder.build(); try (BufferedReader reader = new BufferedReader(new InputStreamReader( new FileInputStream(Freebase_base + File.separator + "freebase_data.txt")))) { // Write to SSTable while reading data int id_counter = 1; int hm_counter = 1; String line; while ((line = reader.readLine()) != null) { String[] parts = line.split("\\|"); if (parts.length == 3) { List<Integer> lhs = toIntList(parts[0]); List<Integer> rhs = toIntList(parts[2]); int predicate = Integer.parseInt(parts[1]); writer.addRow(id_counter, lhs, predicate, rhs); id_counter += 1; if ((id_counter % 10_000_000) == 0) { System.out.println(id_counter); } } } // while loop } catch (InvalidRequestException | IOException e) { e.printStackTrace(); } try { writer.close(); } catch (IOException ignore) { } } System.out.println("done"); String path = Constants.KEYSPACE + File.separator + Constants.CF_TUPLE; System.out.println("you can upload these files to Cassandra: sstableloader -d host " + path); System.exit(0); }
From source file:bulkload.UploadVocab.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 1) { System.out.println("usage: java bulkload.UploadVocab /path/to/free_base/output/"); return;/*from w ww. j av a 2s . c o m*/ } String Freebase_base = args[0]; // magic! Config.setClientMode(true); // create main table { // Create output directory that has Constants.KEYSPACE and table name in the path File outputDir1 = new File(Constants.KEYSPACE + File.separator + Constants.CF_VOCAB); if (!outputDir1.exists() && !outputDir1.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir1); } File outputDir2 = new File(Constants.KEYSPACE + File.separator + Constants.CF_WORD); if (!outputDir2.exists() && !outputDir2.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir2); } // read the predicate vocab map Map<Integer, String> predicateMap = new HashMap<>(); try (BufferedReader reader = new BufferedReader(new InputStreamReader( new FileInputStream(Freebase_base + File.separator + "freebase_predicate_vocab.txt")))) { String line; // predicate_str|predicate_id while ((line = reader.readLine()) != null) { String[] parts = line.split("\\|"); if (parts.length == 2) { String word = parts[0]; int word_id = Integer.parseInt(parts[1]); predicateMap.put(word_id, word); } } // while loop } catch (InvalidRequestException | IOException e) { e.printStackTrace(); } // Prepare SSTable writer CQLSSTableWriter.Builder builder_1 = CQLSSTableWriter.builder(); // set output directory builder_1.inDirectory(outputDir1) // set target schema .forTable(SCHEMA_1) // set CQL statement to put data .using(INSERT_STMT_1) // set partitioner if needed // default is Murmur3Partitioner so set if you use different one. .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer_1 = builder_1.build(); // Prepare SSTable writer CQLSSTableWriter.Builder builder_2 = CQLSSTableWriter.builder(); // set output directory builder_2.inDirectory(outputDir2) // set target schema .forTable(SCHEMA_2) // set CQL statement to put data .using(INSERT_STMT_2) // set partitioner if needed // default is Murmur3Partitioner so set if you use different one. .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer_2 = builder_2.build(); Map<Integer, String> vocabMap = loadVocab(Freebase_base); for (int word_id : vocabMap.keySet()) { String word = vocabMap.get(word_id); writer_1.addRow(word_id, word, predicateMap.containsKey(word_id)); writer_2.addRow(word, word_id, predicateMap.containsKey(word_id)); } try { writer_1.close(); } catch (IOException ignore) { } try { writer_2.close(); } catch (IOException ignore) { } } System.out.println("done"); String path_1 = Constants.KEYSPACE + File.separator + Constants.CF_VOCAB; System.out.println("you can upload these files to Cassandra:\nsstableloader -d host " + path_1); String path_2 = Constants.KEYSPACE + File.separator + Constants.CF_WORD; System.out.println("sstableloader -d host " + path_2); System.exit(0); }
From source file:com.criteo.biggraphite.BgGenerateCassandraSSTables.java
License:Apache License
/** * Utility to write Cassandra SSTables./*from w w w.j av a2s. c o m*/ * * @param args <KEYSPACE> <TABLE> <CQL> <CSV> * @throws IOException if an I/O error occurs reading from the stream */ public static void main(String[] args) throws IOException { if (args.length != 4) { System.out .println("usage: java biggraphite.BgGenerateCassandraSSTables <KEYSPACE> <TABLE> <CQL> <CSV>"); return; } final String keyspace = args[0]; final String table = args[1]; final String schema = new String(Files.readAllBytes(Paths.get(args[2])), StandardCharsets.UTF_8); final String data = args[3]; final String insert_stmt = String.format(INSERT_STMT, keyspace, table); // magic! Config.setClientMode(true); // Create output directory that has keyspace and table name in the path final File outputDir = Paths.get(DEFAULT_OUTPUT_DIR, keyspace, table).toFile(); if (!outputDir.exists() && !outputDir.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir); } // Prepare SSTable writer final CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder().inDirectory(outputDir) // the directory where to write the sstables .forTable(schema) // the schema (CREATE TABLE statement) for the table for which sstable are to be created .using(insert_stmt) // the INSERT statement defining the order of the values to add for a given CQL row .withPartitioner(new Murmur3Partitioner()); try (CQLSSTableWriter writer = builder.build(); BufferedReader reader = new BufferedReader(new FileReader(data)); CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) { // import_whisper don't generate any header, so we should NOT skip the first line //csvReader.getHeader(true); // Write to SSTable while reading data List<String> line; while ((line = csvReader.read()) != null) { // We use Java types here based on // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29 writer.addRow(UUID.fromString(line.get(0)), // metric uuid Long.parseLong(line.get(1)), // time_start_ms Short.parseShort(line.get(2)), // offset parseDouble(line.get(3)), // value Integer.parseInt(line.get(4))); // count } } catch (IOException e) { e.printStackTrace(); } }
From source file:com.stratio.deep.cassandra.cql.RangeUtilsTest.java
License:Apache License
@Test public void testFetchSortedTokens1() { Map<String, Iterable<Comparable>> sortedTokens = RangeUtils.fetchTokens("none", Pair.create(mockSession1, "localhost"), new Murmur3Partitioner()); assertEquals(sortedTokens.size(), 2); List<Comparable> localTokens = Lists.newArrayList(sortedTokens.get("localhost")); assertNotNull(localTokens);/*w ww . j a v a 2s.c o m*/ assertEquals(localTokens.size(), 16); Iterable sortedLocalTokens1 = Ordering.natural() .immutableSortedCopy(Iterables.transform(localTokens1, new Function<String, Long>() { @Nullable @Override public Long apply(@Nullable String input) { return Long.parseLong(input); } })); boolean elementsEquals = Iterables.elementsEqual(Ordering.natural().sortedCopy(localTokens), sortedLocalTokens1); assertTrue(elementsEquals); List<Comparable> remoteTokens = Lists.newArrayList(sortedTokens.get("fake-hostname")); assertNotNull(remoteTokens); assertEquals(remoteTokens.size(), 16); Iterable sortedRemoteTokens1 = Ordering.natural() .immutableSortedCopy(Iterables.transform(remoteTokens1, new Function<String, Long>() { @Nullable @Override public Long apply(@Nullable String input) { return Long.parseLong(input); } })); elementsEquals = Iterables.elementsEqual(Ordering.natural().sortedCopy(sortedRemoteTokens1), Ordering.natural().sortedCopy(remoteTokens)); assertTrue(elementsEquals); }
From source file:com.stratio.deep.cassandra.cql.RangeUtilsTest.java
License:Apache License
@Test public void testFetchSortedTokens2() { Map<String, Iterable<Comparable>> sortedTokens = RangeUtils.fetchTokens("none", Pair.create(mockSession2, "localhost"), new Murmur3Partitioner()); assertEquals(sortedTokens.size(), 2); List<Comparable> localTokens = Lists.newArrayList(sortedTokens.get("localhost")); assertNotNull(localTokens);/*from w w w . j a v a 2 s. co m*/ assertEquals(localTokens.size(), 255); Iterable sortedLocalTokens1 = Ordering.natural() .immutableSortedCopy(Iterables.transform(localTokens2, new Function<String, Long>() { @Nullable @Override public Long apply(@Nullable String input) { return Long.parseLong(input); } })); boolean elementsEquals = Iterables.elementsEqual(Ordering.natural().sortedCopy(localTokens), sortedLocalTokens1); assertTrue(elementsEquals); List<Comparable> remoteTokens = Lists.newArrayList(sortedTokens.get("fake-hostname")); assertNotNull(remoteTokens); assertEquals(remoteTokens.size(), 256); Iterable sortedRemoteTokens1 = Ordering.natural() .immutableSortedCopy(Iterables.transform(remoteTokens2, new Function<String, Long>() { @Nullable @Override public Long apply(@Nullable String input) { return Long.parseLong(input); } })); elementsEquals = Iterables.elementsEqual(Ordering.natural().sortedCopy(sortedRemoteTokens1), Ordering.natural().sortedCopy(remoteTokens)); assertTrue(elementsEquals); }
From source file:dk.dma.ais.store.importer.SSTableWriter.java
License:Open Source License
protected final CQLSSTableWriter writer() { if (writer == null) { createDirectories(writePath);/*from w w w. j a v a 2 s . c o m*/ LOG.info("Writing output to: " + writePath); writer = CQLSSTableWriter.builder().inDirectory(writePath.toString()).forTable(schemaDefinition) .withBufferSizeInMB(256).using(insertStatement).withPartitioner(new Murmur3Partitioner()) .build(); } return writer; }
From source file:hello.BulkLoad.java
License:Apache License
public static void main(String[] args) { if (args.length == 0) { System.out.println("usage: java bulkload.BulkLoad <list of ticker symbols>"); return;/*from w w w . j a va2s. c o m*/ } // magic! Config.setClientMode(true); // Create output directory that has keyspace and table name in the path File outputDir = new File(DEFAULT_OUTPUT_DIR + File.separator + KEYSPACE + File.separator + TABLE); if (!outputDir.exists() && !outputDir.mkdirs()) { throw new RuntimeException("Cannot create output directory: " + outputDir); } // Prepare SSTable writer CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder(); // set output directory builder.inDirectory(outputDir) // set target schema .forTable(SCHEMA) // set CQL statement to put data .using(INSERT_STMT) // set partitioner if needed // default is Murmur3Partitioner so set if you use different one. .withPartitioner(new Murmur3Partitioner()); CQLSSTableWriter writer = builder.build(); for (String ticker : args) { HttpURLConnection conn; try { String u = String.format(CSV_URL, ticker); System.out.println(u); URL url = new URL(u); conn = (HttpURLConnection) url.openConnection(); } catch (IOException e) { throw new RuntimeException(e); } try (BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE)) { if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) { System.out.println("Historical data not found for " + ticker); continue; } csvReader.getHeader(true); // Write to SSTable while reading data List<String> line; while ((line = csvReader.read()) != null) { System.out.println(line); // We use Java types here based on // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29 writer.addRow(ticker, DATE_FORMAT.parse(line.get(0)), new BigDecimal(line.get(1)), new BigDecimal(line.get(2)), new BigDecimal(line.get(3)), new BigDecimal(line.get(4)), Long.parseLong(line.get(5)), new BigDecimal(line.get(6))); } } catch (InvalidRequestException | ParseException | IOException e) { e.printStackTrace(); } } try { writer.close(); } catch (IOException ignore) { } }